neo4j-labs
diff --git a/‎backend/score.py‎
Lines changed: 16 additions & 3 deletions b/‎backend/score.py‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎backend/src/QA_integration.py‎
Lines changed: 109 additions & 0 deletions b/‎backend/src/QA_integration.py‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎backend/src/diffbot_transformer.py‎
Lines changed: 13 additions & 4 deletions b/‎backend/src/diffbot_transformer.py‎
Lines changed: 13 additions & 4 deletions
@@ -6,6 +6,7 @@
 from src.main import *
 import asyncio
 import base64
+from src.QA_integration import *
 
 
 def healthy_condition():
@@ -144,12 +145,24 @@ async def get_source_list(uri:str,
     return result
 
 @app.post("/update_similarity_graph")
-async def update_similarity_graph():
+async def update_similarity_graph(uri=Form(None),
+    userName=Form(None),
+    password=Form(None),
+    database=Form(None)):
     """
     Calls 'update_graph' which post the query to update the similiar nodes in the graph
     """
 
-    result = await asyncio.to_thread(update_graph)
+    result = await asyncio.to_thread(update_graph,uri,userName,password,database)
+    return result
+        
+@app.post("/chat_bot")
+async def chat_bot(uri=Form(None),
+                          userName=Form(None),
+                          password=Form(None),
+                          question=Form(None),
+                          model=Form(None)):
+    result = await asyncio.to_thread(QA_RAG,uri=uri,userName=userName,password=password,model_version=model,question=question)
     return result
 
 def decode_password(pwd):
@@ -158,4 +171,4 @@ def decode_password(pwd):
     return decoded_password
 
 if __name__ == "__main__":
-    uvicorn.run(app)
+    uvicorn.run(app)
@@ -0,0 +1,109 @@
+from langchain_community.vectorstores.neo4j_vector import Neo4jVector
+from langchain.chains import GraphCypherQAChain
+from langchain.graphs import Neo4jGraph
+import os
+from dotenv import load_dotenv
+from langchain.chains import RetrievalQA
+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+import logging
+load_dotenv()
+
+openai_api_key = os.environ.get('OPENAI_API_KEY')
+
+def vector_embed_results(qa,question):
+    vector_res={}
+    try:
+        # question ="What do you know about machine learning"
+        result = qa({"query": question})
+        vector_res['result']=result["result"]
+        list_source_docs=[]
+        for i in result["source_documents"]:
+            list_source_docs.append(i.metadata['source'])
+            vector_res['source']=list_source_docs
+    except Exception as e:
+      error_message = str(e)
+      logging.exception(f'Exception in vector embedding in QA component:{error_message}')
+      raise Exception(error_message)
+    
+    return vector_res
+
+def cypher_results(graph,question,model_version):
+    cypher_res={}
+    try:
+        graph.refresh_schema()
+        cypher_chain = GraphCypherQAChain.from_llm(
+            graph=graph,
+            # cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
+            cypher_llm=ChatOpenAI(temperature=0, model=model_version),
+            qa_llm=ChatOpenAI(temperature=0, model=model_version),
+            validate_cypher=True, # Validate relationship directions
+            verbose=True,
+            top_k=2
+        )
+
+        cypher_res=cypher_chain.invoke({"query": question})
+        
+    except Exception as e:
+      error_message = str(e)
+      logging.exception(f'Exception in CypherQAChain in QA component:{error_message}')
+      raise Exception(error_message)
+
+    return cypher_res
+    
+
+
+def QA_RAG(uri,userName,password,model_version,question):
+    try:
+        if model_version=='OpenAI GPT 3.5':
+            model_version='gpt-3.5-turbo'
+        elif model_version=='OpenAI GPT 4':
+            model_version='gpt-4-0125-preview'
+        retrieval_query="""
+        MATCH (node)-[:PART_OF]->(d:Document)
+        WITH d, apoc.text.join(collect(node.text),"\n----\n") as text, avg(score) as score
+        RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata
+        """
+
+        neo_db=Neo4jVector.from_existing_index(
+                embedding=OpenAIEmbeddings(),
+                url=uri,
+                username=userName,
+                password=password,
+                database="neo4j",
+                index_name="vector",
+                retrieval_query=retrieval_query,
+            )
+        llm = ChatOpenAI(model= model_version, temperature=0)
+
+        qa = RetrievalQA.from_chain_type(
+            llm=llm, chain_type="stuff", retriever=neo_db.as_retriever(search_kwargs={"score_threshold": 0.5}), return_source_documents=True
+        )
+
+        graph = Neo4jGraph(
+            url=uri,
+            username=userName,
+            password=password
+        )
+        vector_res=vector_embed_results(qa,question)
+        print(vector_res)
+        cypher_res= cypher_results(graph,question,model_version)
+        print(cypher_res)
+        final_prompt = f"""You are a helpful question-answering agent. Your task is to analyze
+        and synthesize information from two sources: the top result from a similarity search
+        (unstructured information) and relevant data from a graph database (structured information).
+        Given the user's query: {question}, provide a meaningful and efficient answer based
+        on the insights derived from the following data:
+        Structured information: {cypher_res.get('result','')}.
+        Unstructured information: {vector_res.get('result','')}.
+
+        If structured information fails to find an answer then use the answer from unstructured information and vice versa. I only want a straightforward answer without mentioning from which source you got the answer.
+        """
+        print(final_prompt)
+        response = llm.predict(final_prompt)
+        res={"message":response,"user":"chatbot"}
+        return res
+    except Exception as e:
+      error_message = str(e)
+      logging.exception(f'Exception in in QA component:{error_message}')
+      raise Exception(error_message)
@@ -5,6 +5,7 @@
 from typing import List
 import os
 import logging
+import uuid
 
 logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO')
 def extract_graph_from_diffbot(graph: Neo4jGraph, 
@@ -31,16 +32,24 @@ def extract_graph_from_diffbot(graph: Neo4jGraph,
     graph_document_list = []
 
     logging.info(f"create relationship between source,chunk and entity nodes created from Diffbot")
+    current_chunk_id = ''
+    relationship_cypher_list = []
     for i,chunk in enumerate(chunks):
+        previous_chunk_id = current_chunk_id
+        current_chunk_id = str(uuid.uuid1())
+        position = i+1
         if i == 0:
             firstChunk = True
         else:
             firstChunk = False
+        metadata = {"position": position,"length": len(chunk.page_content)}
+        chunk = Document(page_content=chunk.page_content,metadata = metadata)
         graph_document = diffbot_nlp.convert_to_graph_documents([chunk])
         graph.add_graph_documents(graph_document)
-        create_source_chunk_entity_relationship(file_name,graph,graph_document,chunk,uri,userName,password,firstChunk)
-        graph_document_list.append(graph_document[0]) 
+        lst_cypher_queries_chunk_relationship = create_source_chunk_entity_relationship(file_name,graph,graph_document,chunk,uri,userName,password,firstChunk,current_chunk_id,
+        previous_chunk_id)
+        graph_document_list.append(graph_document[0])
+        relationship_cypher_list.extend(lst_cypher_queries_chunk_relationship)
 
     graph.refresh_schema()
-    return graph_document_list
-    
+    return graph_document_list, relationship_cypher_list