Skip to content

Commit b685f4c

Browse files
Merge pull request #128 from neo4j-labs/DEV
Dev To Staging
2 parents ff2ca07 + 3503fb5 commit b685f4c

29 files changed

+2112
-239
lines changed

backend/score.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from src.main import *
77
import asyncio
88
import base64
9+
from src.QA_integration import *
910

1011

1112
def healthy_condition():
@@ -144,12 +145,24 @@ async def get_source_list(uri:str,
144145
return result
145146

146147
@app.post("/update_similarity_graph")
147-
async def update_similarity_graph():
148+
async def update_similarity_graph(uri=Form(None),
149+
userName=Form(None),
150+
password=Form(None),
151+
database=Form(None)):
148152
"""
149153
Calls 'update_graph' which post the query to update the similiar nodes in the graph
150154
"""
151155

152-
result = await asyncio.to_thread(update_graph)
156+
result = await asyncio.to_thread(update_graph,uri,userName,password,database)
157+
return result
158+
159+
@app.post("/chat_bot")
160+
async def chat_bot(uri=Form(None),
161+
userName=Form(None),
162+
password=Form(None),
163+
question=Form(None),
164+
model=Form(None)):
165+
result = await asyncio.to_thread(QA_RAG,uri=uri,userName=userName,password=password,model_version=model,question=question)
153166
return result
154167

155168
def decode_password(pwd):
@@ -158,4 +171,4 @@ def decode_password(pwd):
158171
return decoded_password
159172

160173
if __name__ == "__main__":
161-
uvicorn.run(app)
174+
uvicorn.run(app)

backend/src/QA_integration.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
from langchain_community.vectorstores.neo4j_vector import Neo4jVector
2+
from langchain.chains import GraphCypherQAChain
3+
from langchain.graphs import Neo4jGraph
4+
import os
5+
from dotenv import load_dotenv
6+
from langchain.chains import RetrievalQA
7+
from langchain_openai import ChatOpenAI
8+
from langchain_openai import OpenAIEmbeddings
9+
import logging
10+
load_dotenv()
11+
12+
openai_api_key = os.environ.get('OPENAI_API_KEY')
13+
14+
def vector_embed_results(qa,question):
15+
vector_res={}
16+
try:
17+
# question ="What do you know about machine learning"
18+
result = qa({"query": question})
19+
vector_res['result']=result["result"]
20+
list_source_docs=[]
21+
for i in result["source_documents"]:
22+
list_source_docs.append(i.metadata['source'])
23+
vector_res['source']=list_source_docs
24+
except Exception as e:
25+
error_message = str(e)
26+
logging.exception(f'Exception in vector embedding in QA component:{error_message}')
27+
raise Exception(error_message)
28+
29+
return vector_res
30+
31+
def cypher_results(graph,question,model_version):
32+
cypher_res={}
33+
try:
34+
graph.refresh_schema()
35+
cypher_chain = GraphCypherQAChain.from_llm(
36+
graph=graph,
37+
# cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
38+
cypher_llm=ChatOpenAI(temperature=0, model=model_version),
39+
qa_llm=ChatOpenAI(temperature=0, model=model_version),
40+
validate_cypher=True, # Validate relationship directions
41+
verbose=True,
42+
top_k=2
43+
)
44+
45+
cypher_res=cypher_chain.invoke({"query": question})
46+
47+
except Exception as e:
48+
error_message = str(e)
49+
logging.exception(f'Exception in CypherQAChain in QA component:{error_message}')
50+
raise Exception(error_message)
51+
52+
return cypher_res
53+
54+
55+
56+
def QA_RAG(uri,userName,password,model_version,question):
57+
try:
58+
if model_version=='OpenAI GPT 3.5':
59+
model_version='gpt-3.5-turbo'
60+
elif model_version=='OpenAI GPT 4':
61+
model_version='gpt-4-0125-preview'
62+
retrieval_query="""
63+
MATCH (node)-[:PART_OF]->(d:Document)
64+
WITH d, apoc.text.join(collect(node.text),"\n----\n") as text, avg(score) as score
65+
RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata
66+
"""
67+
68+
neo_db=Neo4jVector.from_existing_index(
69+
embedding=OpenAIEmbeddings(),
70+
url=uri,
71+
username=userName,
72+
password=password,
73+
database="neo4j",
74+
index_name="vector",
75+
retrieval_query=retrieval_query,
76+
)
77+
llm = ChatOpenAI(model= model_version, temperature=0)
78+
79+
qa = RetrievalQA.from_chain_type(
80+
llm=llm, chain_type="stuff", retriever=neo_db.as_retriever(search_kwargs={"score_threshold": 0.5}), return_source_documents=True
81+
)
82+
83+
graph = Neo4jGraph(
84+
url=uri,
85+
username=userName,
86+
password=password
87+
)
88+
vector_res=vector_embed_results(qa,question)
89+
print(vector_res)
90+
cypher_res= cypher_results(graph,question,model_version)
91+
print(cypher_res)
92+
final_prompt = f"""You are a helpful question-answering agent. Your task is to analyze
93+
and synthesize information from two sources: the top result from a similarity search
94+
(unstructured information) and relevant data from a graph database (structured information).
95+
Given the user's query: {question}, provide a meaningful and efficient answer based
96+
on the insights derived from the following data:
97+
Structured information: {cypher_res.get('result','')}.
98+
Unstructured information: {vector_res.get('result','')}.
99+
100+
If structured information fails to find an answer then use the answer from unstructured information and vice versa. I only want a straightforward answer without mentioning from which source you got the answer.
101+
"""
102+
print(final_prompt)
103+
response = llm.predict(final_prompt)
104+
res={"message":response,"user":"chatbot"}
105+
return res
106+
except Exception as e:
107+
error_message = str(e)
108+
logging.exception(f'Exception in in QA component:{error_message}')
109+
raise Exception(error_message)

backend/src/diffbot_transformer.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import List
66
import os
77
import logging
8+
import uuid
89

910
logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO')
1011
def extract_graph_from_diffbot(graph: Neo4jGraph,
@@ -31,16 +32,24 @@ def extract_graph_from_diffbot(graph: Neo4jGraph,
3132
graph_document_list = []
3233

3334
logging.info(f"create relationship between source,chunk and entity nodes created from Diffbot")
35+
current_chunk_id = ''
36+
relationship_cypher_list = []
3437
for i,chunk in enumerate(chunks):
38+
previous_chunk_id = current_chunk_id
39+
current_chunk_id = str(uuid.uuid1())
40+
position = i+1
3541
if i == 0:
3642
firstChunk = True
3743
else:
3844
firstChunk = False
45+
metadata = {"position": position,"length": len(chunk.page_content)}
46+
chunk = Document(page_content=chunk.page_content,metadata = metadata)
3947
graph_document = diffbot_nlp.convert_to_graph_documents([chunk])
4048
graph.add_graph_documents(graph_document)
41-
create_source_chunk_entity_relationship(file_name,graph,graph_document,chunk,uri,userName,password,firstChunk)
42-
graph_document_list.append(graph_document[0])
49+
lst_cypher_queries_chunk_relationship = create_source_chunk_entity_relationship(file_name,graph,graph_document,chunk,uri,userName,password,firstChunk,current_chunk_id,
50+
previous_chunk_id)
51+
graph_document_list.append(graph_document[0])
52+
relationship_cypher_list.extend(lst_cypher_queries_chunk_relationship)
4353

4454
graph.refresh_schema()
45-
return graph_document_list
46-
55+
return graph_document_list, relationship_cypher_list

0 commit comments

Comments
 (0)