Skip to content

Commit 0996df9

Browse files
Merge pull request #86 from neo4j-labs/create_source_node_youtube
Youtube option Support
2 parents 77fba93 + 9ab7544 commit 0996df9

30 files changed

+960
-618
lines changed

backend/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,5 @@ zipp==3.17.0
180180
poppler-utils
181181
pytesseract
182182
pdf2image
183+
youtube-transcript-api
184+
pytube

backend/score.py

Lines changed: 57 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,12 @@ def healthy_condition():
1111
output = {"healthy": True}
1212
return output
1313

14-
1514
def healthy():
1615
return True
1716

18-
1917
def sick():
2018
return False
2119

22-
2320
app = FastAPI()
2421

2522
app.add_middleware(
@@ -31,10 +28,9 @@ def sick():
3128
)
3229
app.add_api_route("/health", health([healthy_condition, healthy]))
3330

34-
3531
@app.post("/sources")
3632
async def create_source_knowledge_graph(
37-
uri=Form(), userName=Form(), password=Form(), file: UploadFile = File(...)
33+
uri=Form(), database=Form(), userName=Form(), password=Form(), file: UploadFile = File(...), model=Form()
3834
):
3935
"""
4036
Calls 'create_source_node_graph' function in a new thread to create
@@ -49,42 +45,42 @@ async def create_source_knowledge_graph(
4945
Returns:
5046
'Source' Node creation in Neo4j database
5147
"""
52-
try:
53-
result = await asyncio.to_thread(
54-
create_source_node_graph, uri, userName, password, file
55-
)
56-
return result
57-
except Exception as e:
58-
job_status = "Failure"
59-
error_message = str(e)
60-
logging.exception(f"Exception Stack trace:{e}")
61-
return create_api_response(job_status, error=error_message)
62-
48+
result = await asyncio.to_thread(
49+
create_source_node_graph_local_file, uri, database, userName, password, file, model
50+
)
51+
return result
6352

64-
@app.post("/bucket/scan")
65-
async def create_source_knowledge_graph(
53+
@app.post("/url/scan")
54+
async def create_source_knowledge_graph_url(
6655
uri=Form(),
56+
database=Form(),
6757
userName=Form(),
6858
password=Form(),
69-
s3_url_dir=Form(),
59+
source_url=Form(),
7060
aws_access_key_id=Form(None),
7161
aws_secret_access_key=Form(None),
62+
max_limit=Form(5),
63+
query_source=Form(None),
64+
model=Form(None)
7265
):
73-
return create_source_node_graph_s3(
74-
uri, userName, password, s3_url_dir, aws_access_key_id, aws_secret_access_key
66+
return create_source_node_graph_url(
67+
uri, database, userName, password, source_url, max_limit, query_source, model, aws_access_key_id, aws_secret_access_key
7568
)
7669

7770

7871
@app.post("/extract")
7972
async def extract_knowledge_graph_from_file(
8073
uri=Form(),
74+
database=Form(),
8175
userName=Form(),
8276
password=Form(),
8377
file: UploadFile = File(None),
8478
model=Form(),
85-
s3_url=Form(None),
79+
source_url=Form(None),
8680
aws_access_key_id=Form(None),
8781
aws_secret_access_key=Form(None),
82+
wiki_query=Form(None),
83+
max_sources=Form(None),
8884
):
8985
"""
9086
Calls 'extract_graph_from_file' in a new thread to create Neo4jGraph from a
@@ -100,65 +96,54 @@ async def extract_knowledge_graph_from_file(
10096
Returns:
10197
Nodes and Relations created in Neo4j databse for the pdf file
10298
"""
103-
try:
104-
if file:
105-
return await asyncio.to_thread(
106-
extract_graph_from_file,
107-
uri,
108-
userName,
109-
password,
110-
model,
111-
file=file,
112-
s3_url=None,
113-
)
114-
elif s3_url:
115-
return await asyncio.to_thread(
116-
extract_graph_from_file,
117-
uri,
118-
userName,
119-
password,
120-
model,
121-
s3_url=s3_url,
122-
aws_access_key_id=aws_access_key_id,
123-
aws_secret_access_key=aws_secret_access_key,
124-
)
125-
else:
126-
return {"job_status": "Failure", "error": "No file found"}
127-
except Exception as e:
128-
job_status = "Failure"
129-
error_message = str(e)
130-
logging.exception(f"Exception Stack trace:{e}")
131-
return create_api_response(job_status, error=error_message)
132-
99+
100+
if file:
101+
return await asyncio.to_thread(
102+
extract_graph_from_file,
103+
uri,
104+
database,
105+
userName,
106+
password,
107+
model,
108+
file=file,
109+
source_url=None,
110+
wiki_query=wiki_query,
111+
max_sources=max_sources,
112+
)
113+
elif source_url:
114+
return await asyncio.to_thread(
115+
extract_graph_from_file,
116+
uri,
117+
database,
118+
userName,
119+
password,
120+
model,
121+
source_url=source_url,
122+
aws_access_key_id=aws_access_key_id,
123+
aws_secret_access_key=aws_secret_access_key,
124+
wiki_query=wiki_query,
125+
max_sources=max_sources,
126+
)
127+
else:
128+
return {"job_status": "Failure", "error": "No file found"}
129+
133130

134131
@app.get("/sources_list")
135-
async def get_source_list():
132+
async def get_source_list(uri=Form(),database=Form(),userName=Form(),password=Form()):
136133
"""
137134
Calls 'get_source_list_from_graph' which returns list of sources which alreday exist in databse
138135
"""
139-
try:
140-
result = await asyncio.to_thread(get_source_list_from_graph)
141-
return result
142-
except Exception as e:
143-
job_status = "Failure"
144-
error_message = str(e)
145-
logging.exception(f"Exception Stack trace:{e}")
146-
return create_api_response(job_status, error=error_message)
136+
result = await asyncio.to_thread(get_source_list_from_graph,uri,database,userName,password)
137+
return result
147138

148139
@app.post("/update_similarity_graph")
149140
async def update_similarity_graph():
150141
"""
151142
Calls 'update_graph' which post the query to update the similiar nodes in the graph
152143
"""
153-
try:
154-
result = await asyncio.to_thread(update_graph)
155-
return result
156-
except Exception as e:
157-
job_status = "Failure"
158-
error_message = str(e)
159-
logging.exception(f"Exception Stack trace:{e}")
160-
return create_api_response(job_status, error=error_message)
161-
162-
144+
145+
result = await asyncio.to_thread(update_graph)
146+
return result
147+
163148
if __name__ == "__main__":
164-
uvicorn.run(app)
149+
uvicorn.run(app)

backend/src/diffbot_transformer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,14 @@ def extract_graph_from_diffbot(graph: Neo4jGraph,
3131
graph_document_list = []
3232

3333
logging.info(f"create relationship between source,chunk and entity nodes created from Diffbot")
34-
for chunk in chunks:
34+
for i,chunk in enumerate(chunks):
35+
if i == 0:
36+
firstChunk = True
37+
else:
38+
firstChunk = False
3539
graph_document = diffbot_nlp.convert_to_graph_documents([chunk])
3640
graph.add_graph_documents(graph_document)
37-
create_source_chunk_entity_relationship(file_name,graph,graph_document,chunk,uri,userName,password)
41+
create_source_chunk_entity_relationship(file_name,graph,graph_document,chunk,uri,userName,password,firstChunk)
3842
graph_document_list.append(graph_document[0])
3943

4044
graph.refresh_schema()

0 commit comments

Comments
 (0)