Skip to content

Commit 1bdebbf

Browse files
committed
Add required property support to SchemaFromTextExtractor
1 parent 9e6b588 commit 1bdebbf

File tree

2 files changed

+77
-2
lines changed

2 files changed

+77
-2
lines changed

src/neo4j_graphrag/experimental/components/schema.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,66 @@ def _filter_invalid_constraints(
666666
filtered_constraints.append(constraint)
667667
return filtered_constraints
668668

669+
def _filter_properties_required_field(
670+
self, node_types: List[Dict[str, Any]]
671+
) -> List[Dict[str, Any]]:
672+
"""Sanitize the 'required' field in node type properties. Ensures 'required' is a valid boolean.
673+
converts known string values (true, yes, 1, false, no, 0) to booleans and removes unrecognized values.
674+
"""
675+
for node_type in node_types:
676+
properties = node_type.get("properties", [])
677+
if not properties:
678+
continue
679+
for prop in properties:
680+
if not isinstance(prop, dict):
681+
continue
682+
683+
required_value = prop.get("required")
684+
685+
# Not provided - will use Pydantic default (false)
686+
if required_value is None:
687+
continue
688+
689+
# already a valid boolean
690+
if isinstance(required_value, bool):
691+
continue
692+
693+
prop_name = prop.get("name", "unknown")
694+
node_label = node_type.get("label", "unknown")
695+
696+
if isinstance(required_value, str):
697+
if required_value.lower() in ("true", "yes", "1"):
698+
prop["required"] = True
699+
logging.info(
700+
f"Converted 'required' value {required_value} to True "
701+
f"for property '{prop_name}' on node '{node_label}'"
702+
)
703+
elif required_value.lower() in ("false", "no", "0"):
704+
prop["required"] = False
705+
logging.info(
706+
f"Converted 'required' value '{required_value}' to False "
707+
f"for property '{prop_name}' on node '{node_label}' "
708+
)
709+
# Unknown string values
710+
else:
711+
logging.info(
712+
f"Removing unrecognized 'required' value '{required_value}' "
713+
f"for property '{prop_name}' on node '{node_label}'. "
714+
f"Using default (False) " # TODO: Not sure if we have to convert it to the default value - double check!
715+
)
716+
prop.pop("required", None)
717+
else:
718+
# Non-string, non-boolean - remove
719+
logging.info(
720+
f"Removing invalid 'required' value '{required_value}' (type: {type(required_value).__name__}) "
721+
f"for property '{prop_name}' on node '{node_label}'. "
722+
f"Using default (False). "
723+
)
724+
prop.pop("required", None)
725+
726+
return node_types
727+
728+
669729
def _clean_json_content(self, content: str) -> str:
670730
content = content.strip()
671731

@@ -746,6 +806,10 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema
746806
extracted_relationship_types
747807
)
748808

809+
extracted_node_types = self._filter_properties_required_field(
810+
extracted_node_types
811+
)
812+
749813
# Filter out invalid patterns before validation
750814
if extracted_patterns:
751815
extracted_patterns = self._filter_invalid_patterns(

src/neo4j_graphrag/generation/prompts.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,12 @@ class SchemaExtractionTemplate(PromptTemplate):
223223
8.2 Only use properties that seem to not have too many missing values in the sample.
224224
8.3 Constraints reference node_types by label and specify which property is unique.
225225
8.4 If a property appears in a uniqueness constraint it MUST also appear in the corresponding node_type as a property.
226-
226+
9. REQUIRED PROPERTIES:
227+
9.1 Mark a property as "required": true if every instance of that node/relationship type MUST have this property (non-nullable).
228+
9.2 Mark a property as "required": false if the property is optional and may be absent on some instances.
229+
9.3 Properties that are identifiers, names, or essential characteristics are typically required.
230+
9.4 Properties that are supplementary information (phone numbers, descriptions, metadata) are typically optional.
231+
9.5 When uncertain, default to "required": false.
227232
228233
Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST,
229234
LOCAL_DATETIME, LOCAL_TIME, POINT, STRING, ZONED_DATETIME, ZONED_TIME.
@@ -236,7 +241,13 @@ class SchemaExtractionTemplate(PromptTemplate):
236241
"properties": [
237242
{{
238243
"name": "name",
239-
"type": "STRING"
244+
"type": "STRING",
245+
"required": true
246+
}},
247+
{{
248+
"name": "email",
249+
"type": "STRING",
250+
"required": false
240251
}}
241252
]
242253
}}

0 commit comments

Comments
 (0)