2525import concurrent .futures
2626from concurrent .futures import ThreadPoolExecutor
2727import threading
28+ import uuid
2829
2930load_dotenv ()
3031logging .basicConfig (format = '%(asctime)s - %(message)s' ,level = 'INFO' )
@@ -190,6 +191,13 @@ def extract_and_store_graph(
190191 model_version ,
191192 graph : Neo4jGraph ,
192193 document : Document ,
194+ file_name : str ,
195+ uri : str ,
196+ userName :str ,
197+ password :str ,
198+ firstChunk :bool ,
199+ current_chunk_id :uuid ,
200+ previous_chunk_id :uuid ,
193201 nodes :Optional [List [str ]] = None ,
194202 rels :Optional [List [str ]]= None ) -> None :
195203
@@ -198,10 +206,18 @@ def extract_and_store_graph(
198206 store the result into a Neo4jGraph.
199207
200208 Args:
201- graph: Neo4j graph to store the data into
202- document: Langchain document to extract data from
203- nodes: List of nodes to extract ( default : None )
204- rels: List of relationships to extract ( default : None )
209+ model_version: LLM model version
210+ graph: Neo4j graph to store the data into
211+ document: Langchain document to extract data from
212+ file_name (str): file name of input source
213+ uri: URI of the graph to extract
214+ userName: Username to use for graph creation ( if None will use username from config file )
215+ password: Password to use for graph creation ( if None will use password from config file )
216+ firstChunk : It's bool value to create FIRST_CHUNK AND NEXT_CHUNK relationship between chunk and document node.
217+ current_chunk_id : Unique id of chunk
218+ previous_chunk_id : Unique id of previous chunk
219+ nodes: List of nodes to extract ( default : None )
220+ rels: List of relationships to extract ( default : None )
205221
206222 Returns:
207223 The GraphDocument that was extracted and stored into the Neo4jgraph
@@ -221,7 +237,9 @@ def extract_and_store_graph(
221237 )]
222238
223239 graph .add_graph_documents (graph_document )
224- return graph_document
240+ lst_cypher_queries_chunk_relationship = create_source_chunk_entity_relationship (file_name ,graph ,graph_document ,document ,uri ,userName ,password ,firstChunk ,current_chunk_id ,
241+ previous_chunk_id )
242+ return graph_document , lst_cypher_queries_chunk_relationship
225243
226244
227245def extract_graph_from_OpenAI (model_version ,
@@ -248,19 +266,28 @@ def extract_graph_from_OpenAI(model_version,
248266 """
249267 openai_api_key = os .environ .get ('OPENAI_API_KEY' )
250268 graph_document_list = []
269+ relationship_cypher_list = []
251270 futures = []
252271 logging .info (f"create relationship between source,chunk and entity nodes created from { model_version } " )
253272
254273 with ThreadPoolExecutor (max_workers = 10 ) as executor :
274+ current_chunk_id = ''
255275 for i , chunk_document in tqdm (enumerate (chunks ), total = len (chunks )):
276+ previous_chunk_id = current_chunk_id
277+ current_chunk_id = str (uuid .uuid1 ())
278+ position = i + 1
256279 if i == 0 :
257280 firstChunk = True
258281 else :
259282 firstChunk = False
260- futures .append (executor .submit (extract_and_store_graph ,model_version ,graph ,chunk_document ))
283+ metadata = {"position" : position ,"length" : len (chunk_document .page_content )}
284+ chunk_document = Document (page_content = chunk_document .page_content ,metadata = metadata )
285+
286+ futures .append (executor .submit (extract_and_store_graph ,model_version ,graph ,chunk_document ,file_name ,uri ,userName ,password ,firstChunk ,current_chunk_id ,previous_chunk_id ))
261287 for future in concurrent .futures .as_completed (futures ):
262- graph_document = future .result ()
263- create_source_chunk_entity_relationship ( file_name , graph , graph_document , chunk_document , uri , userName , password , firstChunk )
288+ graph_document , lst_cypher_queries_chunk_relationship = future .result ()
289+
264290 graph_document_list .append (graph_document [0 ])
291+ relationship_cypher_list .extend (lst_cypher_queries_chunk_relationship )
265292
266- return graph_document_list
293+ return graph_document_list , relationship_cypher_list
0 commit comments