@@ -6,9 +6,6 @@ class Neo4jDB:
66 def __init__ (self , uri , user , password , database = None ):
77 self .driver = GraphDatabase .driver (uri , auth = (user , password ))
88 self .database = database
9- self .schema = self .generate_schema ()
10- self .export_schema ()
11- # print(self.driver.get_server_info())
129
1310 def close (self ):
1411 self .driver .close ()
@@ -37,14 +34,14 @@ def get_graph_schema(self, graph):
3734 }
3835
3936 def generate_schema (self ):
40- return {
37+ self . schema = {
4138 "node_properties" : self .get_node_properties (),
4239 "relationship_properties" : self .get_relationship_properties (),
4340 "schema" : self .get_relationship_schema (),
4441 }
4542
46- def export_schema (self ):
47- with open ("neo4j-schema.json" , "w" ) as file :
43+ def export_schema (self , filename ):
44+ with open (filename , "w" ) as file :
4845 dump (self .schema , file )
4946
5047 def validate_node_properties (self , node_props ):
@@ -58,7 +55,7 @@ def validate_node_properties(self, node_props):
5855 exists = self .query (cypher_query )["node_exists" ][0 ]
5956 if not exists :
6057 continue
61- node_properties = []
58+ node_properties = set ()
6259 for prop in node ["properties" ]:
6360 cypher_query = f"""
6461 MATCH (n:{ node ["labels" ]} )
@@ -67,8 +64,8 @@ def validate_node_properties(self, node_props):
6764 """
6865 exists = self .query (cypher_query )["node_property_exists" ][0 ]
6966 if exists :
70- node_properties .append (prop )
71- res [node ["labels" ]] = node_properties
67+ node_properties .add (prop )
68+ res [node ["labels" ]] = list ( node_properties )
7269 return res
7370
7471 def validate_relationship_properties (self , rel_props ):
@@ -82,7 +79,7 @@ def validate_relationship_properties(self, rel_props):
8279 exists = self .query (cypher_query )["relationship_exists" ][0 ]
8380 if not exists :
8481 continue
85- rel_properties = []
82+ rel_properties = set ()
8683 for prop in rel ["properties" ]:
8784 cypher_query = f"""
8885 MATCH ()-[r:{ rel ["type" ]} ]-()
@@ -91,27 +88,27 @@ def validate_relationship_properties(self, rel_props):
9188 """
9289 exists = self .query (cypher_query )["relationship_property_exists" ][0 ]
9390 if exists :
94- rel_properties .append (prop )
95- res [rel ["type" ]] = rel_properties
91+ rel_properties .add (prop )
92+ res [rel ["type" ]] = list ( rel_properties )
9693 return res
9794
9895 def validate_relationship_schema (self , rels ):
9996 res = {}
10097 for rel in rels :
101- rel_targets = []
98+ rel_targets = set ()
10299 for target in rel ["target" ]:
103100 cypher_query = f"""
104- MATCH (n1:{ rel ["source" ]} )-[r:{ rel ["relationship" ]} ]-> (n2:{ target } )
101+ MATCH (n1:{ rel ["source" ]} )-[r:{ rel ["relationship" ]} ]-(n2:{ target } )
105102 WHERE n1 IS NOT NULL AND r IS NOT NULL AND n2 IS NOT NULL
106103 RETURN (COUNT(n1) > 0 AND COUNT(r) > 0 AND COUNT(n2) > 0) AS relationship_schema_exists
107104 """
108105 exists = self .query (cypher_query )["relationship_schema_exists" ][0 ]
109106 if exists :
110- rel_targets .append (target )
107+ rel_targets .add (target )
111108 if len (rel_targets ):
112109 if rel ["source" ] not in res :
113110 res [rel ["source" ]] = {}
114- res [rel ["source" ]][rel ["relationship" ]] = rel_targets
111+ res [rel ["source" ]][rel ["relationship" ]] = list ( rel_targets )
115112 return res
116113
117114 def query (self , cypher_query , transformation = "dataframe" ):
@@ -161,6 +158,7 @@ def get_relationship_schema(self):
161158 d = OrderedDict ()
162159 for i in output :
163160 d .setdefault ((i ["source" ], i ["relationship" ]), set ()).add (i ["target" ])
161+ d .setdefault ((i ["target" ], i ["relationship" ]), set ()).add (i ["source" ])
164162 output = [{"source" : k [0 ], "relationship" : k [1 ], "target" : v .pop () if len (v ) == 1 else v } for k , v in d .items ()]
165163 for i in output :
166164 if type (i ["target" ]) is set :
@@ -177,33 +175,5 @@ def get_relationship_schema(self):
177175 DATABASE = "neo4j"
178176
179177 db = Neo4jDB (URI , USER , PASSWORD , DATABASE )
180-
181- # TODO:
182- # - update the deprecated function
183- # - remove duplicates from schema
184- # - add underected schema version
185- #
186- # sample code:
187- # import json
188- #
189- # schema = json.load(open("/home/dimitrios/dimitrios/neo4j-schema.json", "r"))
190- #
191- # undericted_schema = {}
192- # for node_A in schema["schema"]:
193- # if node_A not in undericted_schema:
194- # undericted_schema[node_A] = {}
195- # for relationship in schema["schema"][node_A]:
196- # if relationship not in undericted_schema[node_A]:
197- # undericted_schema[node_A][relationship] = []
198- # for node_B in schema["schema"][node_A][relationship]:
199- # if node_B not in undericted_schema[node_A][relationship]:
200- # undericted_schema[node_A][relationship].append(node_B)
201- # if node_B not in undericted_schema:
202- # undericted_schema[node_B] = {}
203- # if relationship not in undericted_schema[node_B]:
204- # undericted_schema[node_B][relationship] = []
205- # if node_A not in undericted_schema[node_B][relationship]:
206- # undericted_schema[node_B][relationship].append(node_A)
207- #
208- # schema["schema"] = undericted_schema
209- # json.dump(schema, open("/home/dimitrios/dimitrios/neo4j-schema-fix.json", "w"))
178+ db .generate_schema ()
179+ db .export_schema ("neo4j-schema.json" )
0 commit comments