4949from neo4j_graphrag .schema import get_structured_schema
5050
5151
52+ logger = logging .getLogger (__name__ )
53+
54+
5255class PropertyType (BaseModel ):
5356 """
5457 Represents a property on a node or relationship in the graph.
@@ -622,19 +625,19 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema
622625class SchemaFromExistingGraphExtractor (BaseSchemaBuilder ):
623626 """A class to build a GraphSchema object from an existing graph.
624627
625- Uses the get_structured_schema function to extract existing node labels,
626- relationship types, properties and existence constraints.
628+ Uses the get_structured_schema function to extract existing node labels,
629+ relationship types, properties and existence constraints.
627630
628- By default, the built schema does not allow any additional item (property,
629- node label, relationship type or pattern).
631+ By default, the built schema does not allow any additional item (property,
632+ node label, relationship type or pattern).
630633
631- Args:
632- driver (neo4j.Driver): connection to the neo4j database.
633- additional_properties (bool, default False): see GraphSchema
634- additional_node_types (bool, default False): see GraphSchema
635- additional_relationship_types (bool, default False): see GraphSchema:
636- additional_patterns (bool, default False): see GraphSchema:
637- neo4j_database (Optional | str): name of the neo4j database to use
634+ Args:
635+ driver (neo4j.Driver): connection to the neo4j database.
636+ additional_properties (bool, default False): see GraphSchema
637+ additional_node_types (bool, default False): see GraphSchema
638+ additional_relationship_types (bool, default False): see GraphSchema:
639+ additional_patterns (bool, default False): see GraphSchema:
640+ neo4j_database (Optional | str): name of the neo4j database to use
638641 """
639642
640643 def __init__ (
@@ -672,7 +675,7 @@ def _extract_required_properties(
672675 """
673676 schema_metadata = structured_schema .get ("metadata" , {})
674677 existence_constraint = [] # list of (node label, property name)
675- for constraint in schema_metadata .get ("constraints " , []):
678+ for constraint in schema_metadata .get ("constraint " , []):
676679 if constraint ["type" ] in (
677680 "NODE_PROPERTY_EXISTENCE" ,
678681 "NODE_KEY" ,
@@ -688,10 +691,11 @@ def _extract_required_properties(
688691 existence_constraint .append ((lab , prop ))
689692 return existence_constraint
690693
691- async def run (self ) -> GraphSchema :
694+ async def run (self , * args , ** kwargs ) -> GraphSchema :
692695 structured_schema = get_structured_schema (self .driver , database = self .database )
693696 existence_constraint = self ._extract_required_properties (structured_schema )
694697
698+ # node label with properties
695699 node_labels = set (structured_schema ["node_props" ].keys ())
696700 node_types = [
697701 {
@@ -708,6 +712,8 @@ async def run(self) -> GraphSchema:
708712 }
709713 for key , properties in structured_schema ["node_props" ].items ()
710714 ]
715+
716+ # relationships with properties
711717 rel_labels = set (structured_schema ["rel_props" ].keys ())
712718 relationship_types = [
713719 {
@@ -723,27 +729,41 @@ async def run(self) -> GraphSchema:
723729 }
724730 for key , properties in structured_schema ["rel_props" ].items ()
725731 ]
732+
726733 patterns = [
727734 (s ["start" ], s ["type" ], s ["end" ])
728735 for s in structured_schema ["relationships" ]
729736 ]
737+
730738 # deal with nodes and relationships without properties
731739 for source , rel , target in patterns :
732740 if source not in node_labels :
741+ if not self .additional_properties :
742+ logger .warning (
743+ f"SCHEMA: found node label { source } without property and additional_properties=False: this node label will always be pruned!"
744+ )
733745 node_labels .add (source )
734746 node_types .append (
735747 {
736748 "label" : source ,
737749 }
738750 )
739751 if target not in node_labels :
752+ if not self .additional_properties :
753+ logger .warning (
754+ f"SCHEMA: found node label { target } without property and additional_properties=False: this node label will always be pruned!"
755+ )
740756 node_labels .add (target )
741757 node_types .append (
742758 {
743759 "label" : target ,
744760 }
745761 )
746762 if rel not in rel_labels :
763+ if not self .additional_properties :
764+ logger .warning (
765+ f"SCHEMA: found relationship type { rel } without property and additional_properties=False: this relationship type will always be pruned!"
766+ )
747767 rel_labels .add (rel )
748768 relationship_types .append (
749769 {
0 commit comments