Skip to content

Commit 114072a

Browse files
committed
feat: add tests for ConstraintType
1 parent f3186ac commit 114072a

File tree

3 files changed

+353
-10
lines changed

3 files changed

+353
-10
lines changed

src/neo4j_graphrag/experimental/components/schema.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class PropertyType(BaseModel):
7676
]
7777
description: str = ""
7878
required: bool = False
79-
79+
# unique: bool = False
8080
model_config = ConfigDict(
8181
frozen=True,
8282
)
@@ -269,6 +269,16 @@ def validate_constraints_against_node_types(self) -> Self:
269269
raise SchemaValidationError(
270270
f"Constraint references undefined node type: {constraint.node_type}"
271271
)
272+
# Check if property_name exists on the node type (only if additional_properties is False)
273+
node_type = self._node_type_index[constraint.node_type]
274+
if not node_type.additional_properties:
275+
valid_property_names = {p.name for p in node_type.properties}
276+
if constraint.property_name not in valid_property_names:
277+
raise SchemaValidationError(
278+
f"Constraint references undefined property '{constraint.property_name}' "
279+
f"on node type '{constraint.node_type}'. "
280+
f"Valid properties: {valid_property_names}"
281+
)
272282
return self
273283

274284
def node_type_from_label(self, label: str) -> Optional[NodeType]:
@@ -594,7 +604,8 @@ def _filter_relationships_without_labels(
594604
def _filter_invalid_constraints(
595605
self, constraints: List[Dict[str, Any]], node_types: List[Dict[str, Any]]
596606
) -> List[Dict[str, Any]]:
597-
"""Filter out constraints that reference undefined node types or have no property name."""
607+
"""Filter out constraints that reference undefined node types, have no property name,
608+
or reference a property that doesn't exist on the node type."""
598609
if not constraints:
599610
return []
600611

@@ -605,7 +616,16 @@ def _filter_invalid_constraints(
605616
)
606617
return []
607618

608-
valid_node_labels = {node_type.get("label") for node_type in node_types}
619+
# Build a mapping of node_type label -> set of property names
620+
node_type_properties: Dict[str, set[str]] = {}
621+
for node_type_dict in node_types:
622+
label = node_type_dict.get("label")
623+
if label:
624+
properties = node_type_dict.get("properties", [])
625+
property_names = {p.get("name") for p in properties if p.get("name")}
626+
node_type_properties[label] = property_names
627+
628+
valid_node_labels = set(node_type_properties.keys())
609629

610630
filtered_constraints = []
611631
for constraint in constraints:
@@ -617,10 +637,20 @@ def _filter_invalid_constraints(
617637
)
618638
continue
619639
# check if the node_type is valid
620-
if constraint.get("node_type") not in valid_node_labels:
640+
node_type = constraint.get("node_type")
641+
if node_type not in valid_node_labels:
642+
logging.info(
643+
f"Filtering out constraint: {constraint}. "
644+
f"Node type '{node_type}' is not valid. Valid node types: {valid_node_labels}"
645+
)
646+
continue
647+
# check if the property_name exists on the node type
648+
property_name = constraint.get("property_name")
649+
if property_name not in node_type_properties.get(node_type, set()):
621650
logging.info(
622651
f"Filtering out constraint: {constraint}. "
623-
f"Node type '{constraint.get('node_type')}' is not valid. Valid node types: {valid_node_labels}"
652+
f"Property '{property_name}' does not exist on node type '{node_type}'. "
653+
f"Valid properties: {node_type_properties.get(node_type, set())}"
624654
)
625655
continue
626656
filtered_constraints.append(constraint)

src/neo4j_graphrag/generation/prompts.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,12 @@ class SchemaExtractionTemplate(PromptTemplate):
217217
4. Include property definitions only when the type can be confidently inferred, otherwise omit them.
218218
5. When defining patterns, ensure that every node label and relationship label mentioned exists in your lists of node types and relationship types.
219219
6. Do not create node types that aren't clearly mentioned in the text.
220-
7. For each node type, identify a unique identifier property and add it as a UNIQUENESS constraint to the list of constraints.
221-
8. Constraints must reference a node_type label that exists in the list of node types.
222-
9. Each constraint must have a property_name having a name that indicates it is a unique identifier for the node type (e.g., person_id for Person, company_id for Company)
223-
10. Keep your schema minimal and focused on clearly identifiable patterns in the text.
220+
7. Keep your schema minimal and focused on clearly identifiable patterns in the text.
221+
8. UNIQUENESS CONSTRAINTS:
222+
8.1 UNIQUENESS is optional; each node_type may or may not have exactly one uniqueness constraint.
223+
8.2 Only use properties that seem to not have too many missing values in the sample.
224+
8.3 Constraints reference node_types by label and specify which property is unique.
225+
8.4 If a property appears in a uniqueness constraint it MUST also appear in the corresponding node_type as a property.
224226
225227
226228
Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST,
@@ -254,7 +256,7 @@ class SchemaExtractionTemplate(PromptTemplate):
254256
{{
255257
"type": "UNIQUENESS",
256258
"node_type": "Person",
257-
"property_name": "person_id"
259+
"property_name": "name"
258260
}}
259261
...
260262
]

0 commit comments

Comments
 (0)