Skip to content

Commit 1e1fb77

Browse files
Issues Fixed
1 parent dab9c4f commit 1e1fb77

File tree

4 files changed

+218
-370
lines changed

4 files changed

+218
-370
lines changed

backend/score.py

Lines changed: 63 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from src.main import *
66
from src.QA_integration import *
77
from src.entities.user_credential import user_credential
8+
from src.shared.common_fn import *
89
import uvicorn
910
import asyncio
1011
import base64
@@ -36,37 +37,37 @@ def sick():
3637

3738
app.add_api_route("/health", health([healthy_condition, healthy]))
3839

39-
@app.post("/sources")
40-
async def create_source_knowledge_graph(
41-
uri=Form(None), userName=Form(None), password=Form(None), file: UploadFile = File(...), model=Form(),database=Form(None),
42-
):
43-
"""
44-
Calls 'create_source_node_graph' function in a new thread to create
45-
source node in Neo4jGraph when a new file is uploaded.
40+
# @app.post("/sources")
41+
# async def create_source_knowledge_graph(
42+
# uri=Form(None), userName=Form(None), password=Form(None), file: UploadFile = File(...), model=Form(),database=Form(None),
43+
# ):
44+
# """
45+
# Calls 'create_source_node_graph' function in a new thread to create
46+
# source node in Neo4jGraph when a new file is uploaded.
4647

47-
Args:
48-
uri: URI of Graph Service to connect to
49-
userName: Username to connect to Graph Service with ( default : None )
50-
password: Password to connect to Graph Service with ( default : None )
51-
file: File object containing the PDF file
48+
# Args:
49+
# uri: URI of Graph Service to connect to
50+
# userName: Username to connect to Graph Service with ( default : None )
51+
# password: Password to connect to Graph Service with ( default : None )
52+
# file: File object containing the PDF file
5253

53-
Returns:
54-
'Source' Node creation in Neo4j database
55-
"""
56-
try:
57-
result = await asyncio.to_thread(
58-
create_source_node_graph_local_file, uri, userName, password, file, model, database
59-
)
60-
return create_api_response("Success",message="Source Node created successfully",file_source=result.file_source, file_name=result.file_name)
61-
except Exception as e:
62-
# obj_source_node = sourceNode()
63-
job_status = "Failed"
64-
message = "Unable to create source node"
65-
error_message = str(e)
66-
logging.error(f"Error in creating document node: {error_message}")
67-
#update exception in source node
68-
# obj_source_node.update_exception_db(file.filename, error_message)
69-
return create_api_response(job_status, message=message,error=error_message,file_source='local file',file_name=file.filename)
54+
# Returns:
55+
# 'Source' Node creation in Neo4j database
56+
# """
57+
# try:
58+
# result = await asyncio.to_thread(
59+
# create_source_node_graph_local_file, uri, userName, password, file, model, database
60+
# )
61+
# return create_api_response("Success",message="Source Node created successfully",file_source=result.file_source, file_name=result.file_name)
62+
# except Exception as e:
63+
# # obj_source_node = sourceNode()
64+
# job_status = "Failed"
65+
# message = "Unable to create source node"
66+
# error_message = str(e)
67+
# logging.error(f"Error in creating document node: {error_message}")
68+
# #update exception in source node
69+
# # obj_source_node.update_exception_db(file.filename, error_message)
70+
# return create_api_response(job_status, message=message,error=error_message,file_source='local file',file_name=file.filename)
7071

7172
@app.post("/url/scan")
7273
async def create_source_knowledge_graph_url(
@@ -84,22 +85,23 @@ async def create_source_knowledge_graph_url(
8485
source_type=Form(None)
8586
):
8687
try:
88+
graph = create_graph_database_connection(uri, userName, password, database)
89+
graphDb_data_Access = graphDBdataAccess(graph)
8790
if source_type == 's3 bucket' and aws_access_key_id and aws_secret_access_key:
88-
lst_file_name,success_count,failed_count = create_source_node_graph_url_s3(
89-
uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, source_type
91+
lst_file_name,success_count,failed_count = create_source_node_graph_url_s3(graph, model, source_url, aws_access_key_id, aws_secret_access_key, source_type
9092
)
9193
elif source_type == 'gcs bucket':
92-
lst_file_name,success_count,failed_count = create_source_node_graph_url_gcs(
93-
uri, userName, password, database, model, source_url, gcs_bucket_name, gcs_bucket_folder, source_type
94+
lst_file_name,success_count,failed_count = create_source_node_graph_url_gcs(graph, model, source_url, gcs_bucket_name, gcs_bucket_folder, source_type
9495
)
9596
elif source_type == 'youtube':
96-
lst_file_name,success_count,failed_count = create_source_node_graph_url_youtube(
97-
uri, userName, password, database, model, source_url, source_type
97+
lst_file_name,success_count,failed_count = create_source_node_graph_url_youtube(graph, model, source_url, source_type
9898
)
9999
elif source_type == 'Wikipedia':
100-
lst_file_name,success_count,failed_count = create_source_node_graph_url_wikipedia(
101-
uri, userName, password, database, model, wiki_query, source_type
100+
lst_file_name,success_count,failed_count = create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type
102101
)
102+
else:
103+
return create_api_response('Failed',message='source_type is other than accepted source')
104+
103105
if source_url is not None:
104106
source = source_url
105107
else:
@@ -108,11 +110,10 @@ async def create_source_knowledge_graph_url(
108110
message = f"Source Node created successfully for source type: {source_type} and source: {source}"
109111
return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name)
110112
except Exception as e:
111-
job_status = "Failed"
112-
message = f"Unable to create source node for source type: {source_type} and source: {source_url}{wiki_query}"
113+
message = f"Unable to create source node for source type: {source_type} and source: {source}"
113114
error_message = str(e)
114115
logging.exception(f'Exception Stack trace:')
115-
return create_api_response(job_status,message=message,error=error_message,file_source=source_type)
116+
return create_api_response('Failed',message=message,error=error_message,file_source=source_type)
116117

117118

118119
@app.post("/extract")
@@ -122,7 +123,6 @@ async def extract_knowledge_graph_from_file(
122123
password=Form(None),
123124
model=Form(None),
124125
database=Form(None),
125-
file: UploadFile = File(None),
126126
source_url=Form(None),
127127
aws_access_key_id=Form(None),
128128
aws_secret_access_key=Form(None),
@@ -149,35 +149,38 @@ async def extract_knowledge_graph_from_file(
149149
Nodes and Relations created in Neo4j databse for the pdf file
150150
"""
151151
try:
152+
graph = create_graph_database_connection(uri, userName, password, database)
153+
graphDb_data_Access = graphDBdataAccess(graph)
152154
if source_type == 'local file':
153-
return await asyncio.to_thread(
154-
extract_graph_from_file_local_file, uri, userName, password, model, database,file_name, file=file )
155+
result = await asyncio.to_thread(
156+
extract_graph_from_file_local_file, graph, model, file_name)
155157

156158
elif source_type == 's3 bucket' and source_url:
157159
result = await asyncio.to_thread(
158-
extract_graph_from_file_s3, uri, userName, password, model, database,
159-
source_url, aws_access_key_id, aws_secret_access_key)
160+
extract_graph_from_file_s3, graph, model, source_url, aws_access_key_id, aws_secret_access_key)
160161

161162
elif source_type == 'youtube' and source_url:
162163
result = await asyncio.to_thread(
163-
extract_graph_from_file_youtube, uri, userName, password, model, database, source_url)
164+
extract_graph_from_file_youtube, graph, model, source_url)
164165

165166
elif source_type == 'Wikipedia' and wiki_query:
166167
result = await asyncio.to_thread(
167-
extract_graph_from_file_Wikipedia, uri, userName, password, model, database, wiki_query, max_sources)
168+
extract_graph_from_file_Wikipedia, graph, model, wiki_query, max_sources)
168169

169170
elif source_type == 'gcs bucket' and gcs_bucket_name:
170171
result = await asyncio.to_thread(
171-
extract_graph_from_file_gcs, uri, userName, password, model, database,
172-
gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename)
172+
extract_graph_from_file_gcs, graph, model, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename)
173+
else:
174+
return create_api_response('Failed',message='source_type is other than accepted source')
173175

174-
return create_api_response('Success',data=result)
176+
return create_api_response('Success', data=result)
175177
except Exception as e:
176-
message=f"Failed To Process File or LLM Unable To Parse Content"
177-
job_status = "Failed"
178-
error_message = str(e)
179-
return create_api_response(job_status,message=message,error=error_message)
180-
178+
message=f"Failed To Process File:{file_name} or LLM Unable To Parse Content"
179+
logging.info(message)
180+
error_message = str(e)
181+
graphDb_data_Access.update_exception_db(file_name,error_message)
182+
logging.exception(f'Exception Stack trace: {error_message}')
183+
return create_api_response('Failed', message=message, error=error_message, file_name = file_name)
181184

182185
@app.get("/sources_list")
183186
async def get_source_list(uri:str, userName:str, password:str, database:str=None):
@@ -195,7 +198,7 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None
195198
message="Unable to fetch source list"
196199
error_message = str(e)
197200
logging.exception(f'Exception:{error_message}')
198-
return create_api_response(job_status,message=message,error=error_message)
201+
return create_api_response(job_status, message=message, error=error_message)
199202

200203
@app.post("/update_similarity_graph")
201204
async def update_similarity_graph(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None)):
@@ -211,7 +214,7 @@ async def update_similarity_graph(uri=Form(None), userName=Form(None), password=
211214
message="Unable to update KNN Graph"
212215
error_message = str(e)
213216
logging.exception(f'Exception in update KNN graph:{error_message}')
214-
return create_api_response(job_status,message=message,error=error_message)
217+
return create_api_response(job_status, message=message, error=error_message)
215218

216219
@app.post("/chat_bot")
217220
async def chat_bot(uri=Form(None), userName=Form(None), password=Form(None), question=Form(None), session_id=Form(None)):
@@ -223,7 +226,7 @@ async def chat_bot(uri=Form(None), userName=Form(None), password=Form(None), que
223226
message="Unable to get chat response"
224227
error_message = str(e)
225228
logging.exception(f'Exception in chat bot:{error_message}')
226-
return create_api_response(job_status,message=message,error=error_message)
229+
return create_api_response(job_status, message=message, error=error_message)
227230

228231
@app.post("/connect")
229232
async def connect(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None)):
@@ -235,15 +238,15 @@ async def connect(uri=Form(None), userName=Form(None), password=Form(None), data
235238
message="Connection failed to connect Neo4j database"
236239
error_message = str(e)
237240
logging.exception(f'Connection failed to connect Neo4j database:{error_message}')
238-
return create_api_response(job_status,message=message,error=error_message)
241+
return create_api_response(job_status, message=message, error=error_message)
239242

240243
@app.post("/upload")
241244
async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber=Form(None), totalChunks=Form(None),
242245
originalname=Form(None), model=Form(None), uri=Form(None), userName=Form(None),
243246
password=Form(None), database=Form(None)):
244247
try:
245248
result = await asyncio.to_thread(upload_file,uri,userName,password,database,model,file,chunkNumber,totalChunks,originalname)
246-
return create_api_response('Success',message=result)
249+
return create_api_response('Success', message=result)
247250
except Exception as e:
248251
job_status = "Failed"
249252
message="Unable to upload large file into chunks or saving the chunks"

backend/src/document_sources/local_file.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,25 @@
44
from tempfile import NamedTemporaryFile
55
from langchain_community.document_loaders import PyPDFLoader
66

7-
def get_documents_from_file_by_bytes(file):
8-
file_name = file.filename
9-
logging.info(f"get_documents_from_file called for filename = {file_name}")
10-
suffix = Path(file.filename).suffix
11-
with NamedTemporaryFile(delete=True, suffix=suffix) as tmp:
12-
shutil.copyfileobj(file.file, tmp)
13-
tmp_path = Path(tmp.name)
14-
loader = PyPDFLoader(str(tmp_path))
15-
pages = loader.load_and_split()
16-
return file_name, pages
7+
# def get_documents_from_file_by_bytes(file):
8+
# file_name = file.filename
9+
# logging.info(f"get_documents_from_file called for filename = {file_name}")
10+
# suffix = Path(file.filename).suffix
11+
# with NamedTemporaryFile(delete=True, suffix=suffix) as tmp:
12+
# shutil.copyfileobj(file.file, tmp)
13+
# tmp_path = Path(tmp.name)
14+
# loader = PyPDFLoader(str(tmp_path))
15+
# pages = loader.load_and_split()
16+
# return file_name, pages
1717

1818
def get_documents_from_file_by_path(file_path,file_name):
19-
20-
loader = PyPDFLoader(file_path)
21-
pages = loader.load_and_split()
19+
file_path = Path(file_path)
20+
if file_path.exists():
21+
logging.info(f'file {file_name} processing')
22+
loader = PyPDFLoader(file_path)
23+
pages = loader.load_and_split()
24+
else:
25+
logging.info(f'File {file_name} does not exist')
26+
raise Exception(f'File {file_name} does not exist')
27+
2228
return file_name, pages

0 commit comments

Comments
 (0)