Skip to content

Commit 09ae3e0

Browse files
Merge pull request #185 from neo4j-labs/DEV
Staging with gemini
2 parents eea624b + d5429e9 commit 09ae3e0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2264
-986
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,4 +161,5 @@ cython_debug/
161161
.vscode/launch.json
162162
temp.pdf
163163
google-cloud-sdk
164-
google-cloud-cli-469.0.0-linux-x86_64.tar.gz
164+
google-cloud-cli-469.0.0-linux-x86_64.tar.gz
165+
/data/llm-experiments-387609-c73d512ca3b1.json

backend/requirements.txt

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ asyncio==3.4.3
88
attrs==23.2.0
99
backoff==2.2.1
1010
beautifulsoup4==4.12.3
11-
boto3==1.34.66
12-
botocore==1.34.66
11+
boto3
12+
botocore
13+
cachetools==5.3.3
1314
certifi==2024.2.2
1415
cffi==1.16.0
1516
chardet==5.2.0
@@ -23,6 +24,7 @@ dataclasses-json==0.6.4
2324
dataclasses-json-speakeasy==0.5.11
2425
Deprecated==1.2.14
2526
distro==1.9.0
27+
docstring_parser==0.16
2628
effdet==0.4.1
2729
emoji==2.10.1
2830
exceptiongroup==1.2.0
@@ -34,14 +36,29 @@ flatbuffers==23.5.26
3436
fonttools==4.49.0
3537
frozenlist==1.4.1
3638
fsspec==2024.2.0
39+
google-api-core==2.18.0
40+
google-auth==2.29.0
41+
google-cloud-aiplatform==1.45.0
42+
google-cloud-bigquery==3.19.0
43+
google-cloud-core==2.4.1
44+
google-cloud-resource-manager==1.12.3
45+
google-cloud-storage==2.16.0
46+
google-crc32c==1.5.0
47+
google-resumable-media==2.7.0
48+
googleapis-common-protos==1.63.0
3749
greenlet==3.0.3
50+
grpc-google-iam-v1==0.13.0
51+
grpcio==1.62.1
52+
google-ai-generativelanguage
53+
grpcio-status==1.62.1
3854
h11==0.14.0
3955
httpcore==1.0.4
4056
httpx==0.27.0
4157
huggingface-hub==0.20.3
4258
humanfriendly==10.0
4359
idna==3.6
4460
importlib-resources==6.1.1
61+
install==1.3.5
4562
iopath==0.1.10
4663
Jinja2==3.1.3
4764
jmespath==1.0.1
@@ -50,11 +67,13 @@ jsonpatch==1.33
5067
jsonpath-python==1.0.6
5168
jsonpointer==2.4
5269
kiwisolver==1.4.5
53-
langchain==0.1.12
54-
langchain-community==0.0.28
55-
langchain-core==0.1.32
56-
langchain-experimental==0.0.54
57-
langchain-openai==0.0.8
70+
langchain
71+
langchain-google-genai
72+
langchain-community
73+
langchain-core
74+
langchain-experimental
75+
langchain-google-vertexai
76+
langchain-openai
5877
langchain-text-splitters==0.0.1
5978
langdetect==1.0.9
6079
langsmith==0.1.31
@@ -85,7 +104,10 @@ pikepdf==8.11.0
85104
pillow==10.2.0
86105
pillow_heif==0.15.0
87106
portalocker==2.8.2
107+
proto-plus==1.23.0
88108
protobuf==4.23.4
109+
pyasn1==0.6.0
110+
pyasn1_modules==0.4.0
89111
pycocotools==2.0.7
90112
pycparser==2.21
91113
pydantic==2.6.4
@@ -104,10 +126,12 @@ pytz==2024.1
104126
PyYAML==6.0.1
105127
rapidfuzz==3.6.1
106128
regex==2023.12.25
107-
requests==2.31.0
129+
requests
130+
rsa==4.9
108131
s3transfer==0.10.1
109132
safetensors==0.3.2
110133
scipy==1.10.1
134+
shapely==2.0.3
111135
six==1.16.0
112136
sniffio==1.3.1
113137
soupsieve==2.5
@@ -122,14 +146,16 @@ tokenizers==0.15.2
122146
tqdm==4.66.2
123147
transformers==4.37.1
124148
triton==2.2.0
149+
types-protobuf
150+
types-requests
125151
typing-inspect==0.9.0
126152
typing_extensions==4.9.0
127153
tzdata==2024.1
128-
unstructured==0.12.6
129-
unstructured-client==0.18.0
130-
unstructured-inference==0.7.23
131-
unstructured.pytesseract==0.3.12
132-
urllib3==1.26.18
154+
unstructured
155+
unstructured-client
156+
unstructured-inference
157+
unstructured.pytesseract
158+
urllib3
133159
uvicorn==0.29.0
134160
wikipedia==1.4.0
135161
wrapt==1.16.0

backend/score.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
from fastapi import FastAPI, File, UploadFile, Form
2-
import uvicorn
3-
from fastapi import FastAPI, Depends
2+
from fastapi import FastAPI
43
from fastapi_health import health
54
from fastapi.middleware.cors import CORSMiddleware
65
from src.main import *
6+
from src.QA_integration import *
7+
from src.entities.user_credential import user_credential
8+
import uvicorn
79
import asyncio
810
import base64
9-
from src.QA_integration import *
11+
1012

1113

1214
def healthy_condition():
@@ -61,13 +63,13 @@ async def create_source_knowledge_graph_url(
6163
database=Form(None),
6264
aws_access_key_id=Form(None),
6365
aws_secret_access_key=Form(None),
64-
max_limit=Form(5),
65-
query_source=Form(None),
6666
wiki_query=Form(None),
67-
model=Form(None)
67+
model=Form(None),
68+
gcs_bucket_name=Form(None),
69+
gcs_bucket_folder=Form(None)
6870
):
6971
return create_source_node_graph_url(
70-
uri, userName, password, model, source_url, database, wiki_query, aws_access_key_id, aws_secret_access_key
72+
uri, userName, password, model, source_url, database, wiki_query, aws_access_key_id, aws_secret_access_key,gcs_bucket_name, gcs_bucket_folder
7173
)
7274

7375

@@ -84,6 +86,9 @@ async def extract_knowledge_graph_from_file(
8486
aws_secret_access_key=Form(None),
8587
wiki_query=Form(None),
8688
max_sources=Form(None),
89+
gcs_bucket_name=Form(None),
90+
gcs_bucket_folder=Form(None),
91+
gcs_blob_filename=Form(None)
8792
):
8893
"""
8994
Calls 'extract_graph_from_file' in a new thread to create Neo4jGraph from a
@@ -108,10 +113,7 @@ async def extract_knowledge_graph_from_file(
108113
password,
109114
model,
110115
database,
111-
file=file,
112-
source_url=None,
113-
wiki_query=wiki_query,
114-
max_sources=max_sources,
116+
file=file
115117
)
116118
elif source_url:
117119
return await asyncio.to_thread(
@@ -128,7 +130,7 @@ async def extract_knowledge_graph_from_file(
128130
max_sources=max_sources,
129131
)
130132
elif wiki_query:
131-
return await asyncio.to_thread(
133+
return await asyncio.to_thread(
132134
extract_graph_from_file,
133135
uri,
134136
userName,
@@ -137,9 +139,22 @@ async def extract_knowledge_graph_from_file(
137139
database,
138140
wiki_query=wiki_query
139141
)
142+
elif gcs_bucket_name:
143+
return await asyncio.to_thread(
144+
extract_graph_from_file,
145+
uri,
146+
userName,
147+
password,
148+
model,
149+
database,
150+
gcs_bucket_name = gcs_bucket_name,
151+
gcs_bucket_folder = gcs_bucket_folder,
152+
gcs_blob_filename = gcs_blob_filename
153+
)
154+
140155

141156
else:
142-
return {"job_status": "Failure", "error": "No file found"}
157+
return {"status": "Failed", "error": "No file found"}
143158

144159

145160
@app.get("/sources_list")

backend/src/QA_integration.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from langchain_openai import OpenAIEmbeddings
99
import logging
1010
from langchain_community.chat_message_histories import Neo4jChatMessageHistory
11-
import asyncio
1211
load_dotenv()
1312

1413
openai_api_key = os.environ.get('OPENAI_API_KEY')
@@ -30,29 +29,29 @@ def vector_embed_results(qa,question):
3029

3130
return vector_res
3231

33-
def cypher_results(graph,question):
34-
cypher_res={}
35-
try:
36-
graph.refresh_schema()
37-
cypher_chain = GraphCypherQAChain.from_llm(
38-
graph=graph,
39-
cypher_llm=ChatOpenAI(temperature=0, model=model_version),
40-
qa_llm=ChatOpenAI(temperature=0, model=model_version),
41-
validate_cypher=True, # Validate relationship directions
42-
verbose=True,
43-
top_k=2
44-
)
45-
try:
46-
cypher_res=cypher_chain.invoke({"query": question})
47-
except:
48-
cypher_res={}
32+
# def cypher_results(graph,question):
33+
# cypher_res={}
34+
# try:
35+
# graph.refresh_schema()
36+
# cypher_chain = GraphCypherQAChain.from_llm(
37+
# graph=graph,
38+
# cypher_llm=ChatOpenAI(temperature=0, model=model_version),
39+
# qa_llm=ChatOpenAI(temperature=0, model=model_version),
40+
# validate_cypher=True, # Validate relationship directions
41+
# verbose=True,
42+
# top_k=2
43+
# )
44+
# try:
45+
# cypher_res=cypher_chain.invoke({"query": question})
46+
# except:
47+
# cypher_res={}
4948

50-
except Exception as e:
51-
error_message = str(e)
52-
logging.exception(f'Exception in CypherQAChain in QA component:{error_message}')
53-
# raise Exception(error_message)
49+
# except Exception as e:
50+
# error_message = str(e)
51+
# logging.exception(f'Exception in CypherQAChain in QA component:{error_message}')
52+
# # raise Exception(error_message)
5453

55-
return cypher_res
54+
# return cypher_res
5655

5756
def save_chat_history(uri,userName,password,session_id,user_message,ai_message):
5857
try:
@@ -117,17 +116,19 @@ def QA_RAG(uri,userName,password,question,session_id):
117116
llm=llm, chain_type="stuff", retriever=neo_db.as_retriever(search_kwargs={'k': 3,"score_threshold": 0.5}), return_source_documents=True
118117
)
119118

120-
graph = Neo4jGraph(
121-
url=uri,
122-
username=userName,
123-
password=password
124-
)
125119
vector_res=vector_embed_results(qa,question)
126120
print('Response from Vector embeddings')
127121
print(vector_res)
128-
cypher_res= cypher_results(graph,question)
129-
print('Response from CypherQAChain')
130-
print(cypher_res)
122+
123+
# Disable Cypher Chain QA
124+
# graph = Neo4jGraph(
125+
# url=uri,
126+
# username=userName,
127+
# password=password
128+
# )
129+
# cypher_res= cypher_results(graph,question)
130+
# print('Response from CypherQAChain')
131+
# print(cypher_res)
131132

132133
chat_summary=get_chat_history(llm,uri,userName,password,session_id)
133134

@@ -140,10 +141,11 @@ def QA_RAG(uri,userName,password,question,session_id):
140141
Given the user's query: {question}, provide a meaningful and efficient answer based
141142
on the insights derived from the following data:
142143
chat_summary:{chat_summary}
143-
Structured information: {cypher_res.get('result','')}.
144+
Structured information: .
144145
Unstructured information: {vector_res.get('result','')}.
145146
146-
"""
147+
"""
148+
147149
print(final_prompt)
148150
response = llm.predict(final_prompt)
149151
ai_message=response

backend/src/QA_optimization.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self, uri, userName, password, question, session_id):
3737
async def execute(self):
3838
tasks = [
3939
self._vector_embed_results(),
40-
self._cypher_results(),
40+
# self._cypher_results(), ## Disabled call for cypher_results
4141
self._get_chat_history()
4242
]
4343
return await asyncio.gather(*tasks)
@@ -114,8 +114,6 @@ async def _cypher_results(self):
114114
print("Cypher QA duration",datetime.now()-t)
115115
return cypher_res
116116

117-
118-
119117
async def _get_chat_history(self):
120118
try:
121119
t=datetime.now()
@@ -195,9 +193,12 @@ async def main(uri,userName,password,question,session_id):
195193
parallel_component = ParallelComponent(uri, userName, password, question, session_id)
196194
f_results=await parallel_component.execute()
197195
print(f_results)
196+
# f_vector_result=f_results[0]['result']
197+
# f_cypher_result=f_results[1].get('result','')
198+
# f_chat_summary=f_results[2]['result']
198199
f_vector_result=f_results[0]['result']
199-
f_cypher_result=f_results[1].get('result','')
200-
f_chat_summary=f_results[2]['result']
200+
f_cypher_result = "" # Passing Empty string for cypher_result
201+
f_chat_summary=f_results[1]['result']
201202
print(f_vector_result)
202203
print(f_cypher_result)
203204
print(f_chat_summary)

backend/src/api_response.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
2+
3+
def create_api_response(status,success_count=None,Failed_count=None, data=None, error=None,message=None,file_source=None,file_name=None):
4+
"""
5+
Create a response to be sent to the API. This is a helper function to create a JSON response that can be sent to the API.
6+
7+
Args:
8+
status: The status of the API call. Should be one of the constants in this module.
9+
data: The data that was returned by the API call.
10+
error: The error that was returned by the API call.
11+
success_count: Number of files successfully processed.
12+
Failed_count: Number of files failed to process.
13+
Returns:
14+
A dictionary containing the status data and error if any
15+
"""
16+
response = {"status": status}
17+
18+
# Set the data of the response
19+
if data is not None:
20+
response["data"] = data
21+
22+
# Set the error message to the response.
23+
if error is not None:
24+
response["error"] = error
25+
26+
if success_count is not None:
27+
response['success_count']=success_count
28+
response['Failed_count']=Failed_count
29+
30+
if message is not None:
31+
response['message']=message
32+
33+
if file_source is not None:
34+
response['file_source']=file_source
35+
36+
if file_name is not None:
37+
response['file_name']=file_name
38+
39+
return response

0 commit comments

Comments
 (0)