Skip to content

Commit 15a3876

Browse files
Merge pull request #152 from neo4j-labs/DEV
DEV- to - STAGING
2 parents 4ac12aa + 3179a6d commit 15a3876

File tree

17 files changed

+413
-202
lines changed

17 files changed

+413
-202
lines changed

backend/score.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,17 @@ async def create_source_knowledge_graph_url(
5757
uri=Form(None),
5858
userName=Form(None),
5959
password=Form(None),
60-
source_url=Form(),
60+
source_url=Form(None),
6161
database=Form(None),
6262
aws_access_key_id=Form(None),
6363
aws_secret_access_key=Form(None),
6464
max_limit=Form(5),
6565
query_source=Form(None),
66+
wiki_query=Form(None),
6667
model=Form(None)
6768
):
6869
return create_source_node_graph_url(
69-
uri, userName, password, source_url, model, database, aws_access_key_id, aws_secret_access_key
70+
uri, userName, password, model, source_url, database, wiki_query, aws_access_key_id, aws_secret_access_key
7071
)
7172

7273

@@ -126,6 +127,17 @@ async def extract_knowledge_graph_from_file(
126127
wiki_query=wiki_query,
127128
max_sources=max_sources,
128129
)
130+
elif wiki_query:
131+
return await asyncio.to_thread(
132+
extract_graph_from_file,
133+
uri,
134+
userName,
135+
password,
136+
model,
137+
database,
138+
wiki_query=wiki_query
139+
)
140+
129141
else:
130142
return {"job_status": "Failure", "error": "No file found"}
131143

@@ -165,6 +177,14 @@ async def chat_bot(uri=Form(None),
165177
result = await asyncio.to_thread(QA_RAG,uri=uri,userName=userName,password=password,model_version=model,question=question)
166178
return result
167179

180+
@app.post("/connect")
181+
async def connect(uri=Form(None),
182+
userName=Form(None),
183+
password=Form(None),
184+
database=Form(None)):
185+
result = await asyncio.to_thread(connection_check,uri,userName,password,database)
186+
return result
187+
168188
def decode_password(pwd):
169189
sample_string_bytes = base64.b64decode(pwd)
170190
decoded_password = sample_string_bytes.decode("utf-8")

backend/src/main.py

Lines changed: 120 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def check_url_source(url):
160160
logging.error(f"Error in recognize URL: {e}")
161161
raise Exception(e)
162162

163-
def create_source_node_graph_url(uri, userName, password, source_url ,model, db_name=None,aws_access_key_id=None,aws_secret_access_key=None):
163+
def create_source_node_graph_url(uri, userName, password ,model, source_url=None, db_name=None,wiki_query:List[str]=None,aws_access_key_id=None,aws_secret_access_key=None):
164164
"""
165165
Creates a source node in Neo4jGraph and sets properties.
166166
@@ -176,60 +176,87 @@ def create_source_node_graph_url(uri, userName, password, source_url ,model, db_
176176
Success or Failed message of node creation
177177
"""
178178
try:
179-
source_type,youtube_url = check_url_source(source_url)
180179
graph = Neo4jGraph(url=uri, database=db_name, username=userName, password=password)
181-
logging.info(f"source type URL:{source_type}")
182-
if source_type == "s3 bucket":
183-
lst_s3_file_name = []
184-
files_info = get_s3_files_info(source_url,aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key)
185-
if isinstance(files_info,dict):
186-
return files_info
187-
elif len(files_info)==0:
188-
return create_api_response('Failed',success_count=0,Failed_count=0,message='No pdf files found.')
189-
logging.info(f'files info : {files_info}')
190-
err_flag=0
191-
success_count=0
192-
Failed_count=0
193-
file_type='pdf'
194-
for file_info in files_info:
195-
job_status = "New"
196-
file_name=file_info['file_key']
197-
file_size=file_info['file_size_bytes']
198-
s3_file_path=str(source_url+file_name)
199-
try:
200-
create_source_node(graph,file_name.split('/')[-1],file_size,file_type,source_type,model,s3_file_path,aws_access_key_id)
201-
success_count+=1
202-
lst_s3_file_name.append({'fileName':file_name.split('/')[-1],'fileSize':file_size,'url':s3_file_path})
203-
204-
except Exception as e:
205-
err_flag=1
206-
Failed_count+=1
207-
error_message = str(e)
208-
if err_flag==1:
209-
job_status = "Failed"
210-
message="Unable to create source node for s3 bucket files"
211-
return create_api_response(job_status,message=message,error=error_message,success_count=success_count,Failed_count=Failed_count,file_source='s3 bucket')
212-
return create_api_response("Success",message="Source Node created successfully",success_count=success_count,Failed_count=Failed_count,file_source='s3 bucket',file_name=lst_s3_file_name)
213-
elif source_type == 'youtube':
214-
source_url= youtube_url
215-
match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',source_url)
216-
logging.info(f"match value{match}")
217-
file_name = YouTube(source_url).title
218-
transcript= get_youtube_transcript(match.group(1))
219-
if transcript==None or len(transcript)==0:
220-
file_size=''
221-
job_status = "Failed"
222-
message = f"Youtube transcript is not available for : {file_name}"
223-
error_message = str(e)
224-
logging.exception(f'Exception Stack trace:')
225-
return create_api_response(job_status,message=message,error=error_message,file_source=source_type)
226-
else:
227-
file_size=sys.getsizeof(transcript)
228-
file_type='text'
229-
aws_access_key_id=''
230-
job_status = "Completed"
231-
create_source_node(graph,file_name,file_size,file_type,source_type,model,source_url,aws_access_key_id)
232-
return create_api_response(job_status,file_name={'fileName':file_name,'fileSize':file_size,'url':source_url})
180+
if source_url:
181+
source_type,youtube_url = check_url_source(source_url)
182+
logging.info(f"source type URL:{source_type}")
183+
if source_type == "s3 bucket":
184+
lst_s3_file_name = []
185+
files_info = get_s3_files_info(source_url,aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key)
186+
if isinstance(files_info,dict):
187+
return files_info
188+
elif len(files_info)==0:
189+
return create_api_response('Failed',success_count=0,Failed_count=0,message='No pdf files found.')
190+
logging.info(f'files info : {files_info}')
191+
err_flag=0
192+
success_count=0
193+
Failed_count=0
194+
file_type='pdf'
195+
for file_info in files_info:
196+
job_status = "New"
197+
file_name=file_info['file_key']
198+
file_size=file_info['file_size_bytes']
199+
s3_file_path=str(source_url+file_name)
200+
try:
201+
create_source_node(graph,file_name.split('/')[-1],file_size,file_type,source_type,model,s3_file_path,aws_access_key_id)
202+
success_count+=1
203+
lst_s3_file_name.append({'fileName':file_name.split('/')[-1],'fileSize':file_size,'url':s3_file_path})
204+
205+
except Exception as e:
206+
err_flag=1
207+
Failed_count+=1
208+
error_message = str(e)
209+
if err_flag==1:
210+
job_status = "Failed"
211+
message="Unable to create source node for s3 bucket files"
212+
return create_api_response(job_status,message=message,error=error_message,success_count=success_count,Failed_count=Failed_count,file_source='s3 bucket')
213+
return create_api_response("Success",message="Source Node created successfully",success_count=success_count,Failed_count=Failed_count,file_source='s3 bucket',file_name=lst_s3_file_name)
214+
elif source_type == 'youtube':
215+
source_url= youtube_url
216+
match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',source_url)
217+
logging.info(f"match value{match}")
218+
file_name = YouTube(source_url).title
219+
transcript= get_youtube_transcript(match.group(1))
220+
if transcript==None or len(transcript)==0:
221+
file_size=''
222+
job_status = "Failed"
223+
message = f"Youtube transcript is not available for : {file_name}"
224+
error_message = str(e)
225+
logging.exception(f'Exception Stack trace:')
226+
return create_api_response(job_status,message=message,error=error_message,file_source=source_type)
227+
else:
228+
file_size=sys.getsizeof(transcript)
229+
file_type='text'
230+
aws_access_key_id=''
231+
job_status = "Completed"
232+
create_source_node(graph,file_name,file_size,file_type,source_type,model,source_url,aws_access_key_id)
233+
return create_api_response(job_status,file_name={'fileName':file_name,'fileSize':file_size,'url':source_url})
234+
235+
elif wiki_query:
236+
success_count=0
237+
Failed_count=0
238+
lst_file_metadata=[]
239+
queries = wiki_query.split(',')
240+
for query in queries:
241+
logging.info(f"Creating source node for {query.strip()}")
242+
pages = WikipediaLoader(query=query.strip(), load_max_docs=1, load_all_available_meta=True).load()
243+
file_name = query.strip()
244+
file_size = sys.getsizeof(pages[0].page_content)
245+
file_type = 'text'
246+
source_url= pages[0].metadata['source']
247+
aws_access_key_id=''
248+
source_type = 'Wikipedia'
249+
job_status = 'Completed'
250+
try:
251+
create_source_node(graph,file_name,file_size,file_type,source_type,model,source_url,aws_access_key_id)
252+
success_count+=1
253+
lst_file_metadata.append({'fileName':file_name,'fileSize':file_size,'url':source_url})
254+
except Exception as e:
255+
job_status = "Failed"
256+
Failed_count+=1
257+
error_message = str(e)
258+
return create_api_response(job_status,message="SUnable to create source node for Wikipedia source",file_name=lst_file_metadata, success_count=success_count, Failed_count=Failed_count)
259+
return create_api_response(job_status,message="Source Node created successfully",file_name=lst_file_metadata, success_count=success_count, Failed_count=Failed_count)
233260
else:
234261
job_status = "Failed"
235262
return create_api_response(job_status,message='Invalid URL')
@@ -281,25 +308,6 @@ def get_s3_pdf_content(s3_url,aws_access_key_id=None,aws_secret_access_key=None)
281308
logging.error(f"getting error while reading content from s3 files:{e}")
282309
raise Exception(e)
283310

284-
def get_wikipedia_content(wiki_query,max_sources):
285-
try:
286-
searches=wiki_query.split(',')
287-
if max_sources:
288-
searches=searches[:int(max_sources)]
289-
else:
290-
searches=searches[:2]
291-
pages=[]
292-
for query in searches:
293-
wiki_pages = WikipediaLoader(query=query.strip(), load_max_docs=1, load_all_available_meta=False).load()
294-
pages.extend(wiki_pages)
295-
296-
logging.info(f"Total Pages from Wikipedia = {len(pages)}")
297-
return pages
298-
except Exception as e:
299-
logging.error(f"Not finding wiki content:{e}")
300-
raise Exception(e)
301-
302-
303311

304312
def extract_graph_from_file(uri, userName, password, model, db_name=None, file=None,source_url=None,aws_access_key_id=None,aws_secret_access_key=None,wiki_query=None,max_sources=None):
305313
"""
@@ -329,6 +337,9 @@ def extract_graph_from_file(uri, userName, password, model, db_name=None, file=N
329337
if file!=None:
330338
file_name, file_key, pages = get_documents_from_file(file)
331339

340+
elif wiki_query:
341+
file_name, file_key, pages = get_documents_from_Wikipedia(wiki_query)
342+
332343
elif source_type =='s3 bucket':
333344
if(aws_access_key_id==None or aws_secret_access_key==None):
334345
job_status = "Failed"
@@ -339,9 +350,6 @@ def extract_graph_from_file(uri, userName, password, model, db_name=None, file=N
339350
logging.info(f"filename {file_name} file_key: {file_key} pages:{pages} ")
340351
elif source_type =='youtube':
341352
file_name, file_key, pages = get_documents_from_youtube(source_url)
342-
if wiki_query is not None:
343-
logging.info(f"Wikipedia query source = {wiki_query}")
344-
pages.extend(get_wikipedia_content(wiki_query, max_sources))
345353

346354
else:
347355
job_status = "Failed"
@@ -483,6 +491,21 @@ def get_documents_from_youtube(url):
483491
logging.exception(f'Exception in reading transcript from youtube:{error_message}')
484492
raise Exception(error_message)
485493

494+
def get_documents_from_Wikipedia(wiki_query:str):
495+
try:
496+
pages = WikipediaLoader(query=wiki_query.strip(), load_max_docs=1, load_all_available_meta=False).load()
497+
file_name = wiki_query.strip()
498+
file_key = wiki_query.strip()
499+
logging.info(f"Total Pages from Wikipedia = {len(pages)}")
500+
return file_name, file_key, pages
501+
except Exception as e:
502+
job_status = "Failed"
503+
message="Failed To Process Wikipedia Query"
504+
error_message = str(e)
505+
logging.error(f"Failed To Process Wikipedia Query: {file_name}")
506+
logging.exception(f'Exception Stack trace: {error_message}')
507+
return create_api_response(job_status,message=message,error=error_message,file_name=file_name)
508+
486509
def get_source_list_from_graph(uri,userName,password,db_name=None):
487510
"""
488511
Args:
@@ -529,6 +552,27 @@ def update_graph(uri,userName,password,db_name):
529552
error_message = str(e)
530553
logging.exception(f'Exception in update KNN graph:{error_message}')
531554
raise Exception(error_message)
555+
556+
def connection_check(uri,userName,password,db_name):
557+
"""
558+
Args:
559+
uri: URI of the graph to extract
560+
userName: Username to use for graph creation ( if None will use username from config file )
561+
password: Password to use for graph creation ( if None will use password from config file )
562+
db_name: db_name is database name to connect to graph db
563+
Returns:
564+
Returns a status of connection from NEO4j is success or failure
565+
"""
566+
try:
567+
graph = Neo4jGraph(url=uri, database=db_name, username=userName, password=password)
568+
if graph:
569+
return create_api_response("Success",message="Connection Successful")
570+
except Exception as e:
571+
job_status = "Failed"
572+
message="Connection Failed"
573+
error_message = str(e)
574+
logging.exception(f'Exception:{error_message}')
575+
return create_api_response(job_status,message=message,error=error_message)
532576

533577
def create_api_response(status,success_count=None,Failed_count=None, data=None, error=None,message=None,file_source=None,file_name=None):
534578
"""

frontend/src/App.css

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@
7272
display: flex;
7373
align-items: center;
7474
justify-content: center;
75-
height: calc(-477px + 100dvh);
7675
border-radius: 8px;
7776
outline-color: #d1d5db;
7877
}
@@ -122,4 +121,34 @@
122121
overflow: hidden;
123122
text-overflow: ellipsis;
124123
white-space: nowrap;
124+
}
125+
126+
.loader {
127+
width: 8px;
128+
height: 8px;
129+
border-radius: 50%;
130+
display: block;
131+
margin: 7px auto;
132+
position: relative;
133+
background: #FFF;
134+
box-shadow: -12px 0 #FFF, 12px 0 #FFF;
135+
box-sizing: border-box;
136+
animation: shadowPulse 2s linear infinite;
137+
}
138+
139+
@keyframes shadowPulse {
140+
33% {
141+
background: #FFF;
142+
box-shadow: -12px 0 rgb(var(--theme-palette-primary-bg-strong)), 12px 0 #FFF;
143+
}
144+
145+
66% {
146+
background: rgb(var(--theme-palette-primary-bg-strong));
147+
box-shadow: -12px 0 #FFF, 12px 0 #FFF;
148+
}
149+
150+
100% {
151+
background: #FFF;
152+
box-shadow: -12px 0 #FFF, 12px 0 rgb(var(--theme-palette-primary-bg-strong));
153+
}
125154
}
Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
{
2-
"listMessages": [
1+
{
2+
"listMessages": [
33
{
44
"id": 1,
55
"message": "Hi, I need help with creating a Cypher query for Neo4j.",
@@ -8,7 +8,7 @@
88
},
99
{
1010
"id": 2,
11-
"message": "Hi there! Welcome to Neo4j Chat! Your insights are just a click away from completed files",
11+
"message": " Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.",
1212
"user": "chatbot",
1313
"datetime": "01/01/2024 00:00:00"
1414
},
@@ -20,8 +20,7 @@
2020
},
2121
{
2222
"id": 4,
23-
"message":
24-
"Alright, you can use the following query: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN e.name`. This query matches nodes labeled 'Employee' related to the 'IT' department and returns their names.",
23+
"message": "Alright, you can use the following query: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN e.name`. This query matches nodes labeled 'Employee' related to the 'IT' department and returns their names.",
2524
"user": "chatbot",
2625
"datetime": "01/01/2024 00:00:00"
2726
},
@@ -33,10 +32,9 @@
3332
},
3433
{
3534
"id": 6,
36-
"message":
37-
"To get the count, use: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN count(e)`. This counts all the distinct 'Employee' nodes related to the 'IT' department.",
35+
"message": "To get the count, use: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN count(e)`. This counts all the distinct 'Employee' nodes related to the 'IT' department.",
3836
"user": "chatbot",
3937
"datetime": "01/01/2024 00:00:00"
4038
}
41-
]
42-
}
39+
]
40+
}

frontend/src/assets/images/Wikipedia-logo-v2.svg

Lines changed: 1 addition & 0 deletions
Loading

0 commit comments

Comments
 (0)