Skip to content
This repository was archived by the owner on May 27, 2025. It is now read-only.

Commit d1f2c11

Browse files
authored
Enable app insights (#79)
1 parent 61aff40 commit d1f2c11

20 files changed

+695
-593
lines changed

backend/src/api/experimental.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import inspect
55
import json
66
import os
7+
import traceback
78
from queue import Queue
89
from threading import Thread
910

@@ -29,6 +30,7 @@
2930
from src.api.query import _is_index_complete, _reformat_context_data
3031
from src.meta_agent.global_search.retrieve import GlobalSearchHelpers
3132
from src.models import GraphRequest
33+
from src.reporting import ReporterSingleton
3234
from src.utils import query as query_helper
3335

3436
experimental_route = APIRouter(
@@ -188,6 +190,11 @@ def task():
188190
stream_response(report_df=report_df, query=request.query),
189191
media_type="application/json",
190192
)
191-
except Exception:
192-
# temporary logging of errors until reporters are in place
193+
except Exception as e:
194+
reporter = ReporterSingleton().get_instance()
195+
reporter.on_error(
196+
message="Error encountered while streaming global search response",
197+
cause=e,
198+
stack=traceback.format_exc(),
199+
)
193200
raise HTTPException(status_code=500, detail=None)

backend/src/api/index.py

Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import asyncio
55
import inspect
66
import os
7+
import traceback
78
from typing import cast
89

910
import yaml
@@ -98,7 +99,7 @@ async def setup_indexing_pipeline(
9899
if not _blob_service_client.get_container_client(sanitized_storage_name).exists():
99100
raise HTTPException(
100101
status_code=500,
101-
detail=f"Data container '{storage_name}' does not exist.",
102+
detail=f"Storage blob container {storage_name} does not exist",
102103
)
103104

104105
# check for prompts
@@ -202,7 +203,7 @@ async def setup_indexing_pipeline(
202203
status_code=500,
203204
detail="exception when calling BatchV1Api->create_namespaced_job",
204205
)
205-
return BaseResponse(status="indexing operation has been scheduled.")
206+
return BaseResponse(status="Indexing operation scheduled")
206207
except Exception:
207208
reporter = ReporterSingleton().get_instance()
208209
job_details = {
@@ -215,7 +216,7 @@ async def setup_indexing_pipeline(
215216
)
216217
raise HTTPException(
217218
status_code=500,
218-
detail=f"Error occurred during setup of indexing job for '{index_name}'.",
219+
detail=f"Error occurred during setup of indexing job for index {index_name}",
219220
)
220221

221222

@@ -232,19 +233,6 @@ async def _start_indexing_pipeline(index_name: str):
232233
# download nltk dependencies
233234
bootstrap()
234235

235-
# create new reporters/callbacks just for this job
236-
reporters = []
237-
reporter_names = os.getenv("REPORTERS", Reporters.CONSOLE.name.upper()).split(",")
238-
for reporter_name in reporter_names:
239-
try:
240-
reporters.append(Reporters[reporter_name.upper()])
241-
except KeyError:
242-
raise ValueError(f"Found unknown reporter: {reporter_name}")
243-
244-
workflow_callbacks = load_pipeline_reporter(
245-
reporting_dir=sanitized_index_name, reporters=reporters
246-
)
247-
248236
# load custom pipeline settings
249237
this_directory = os.path.dirname(
250238
os.path.abspath(inspect.getfile(inspect.currentframe()))
@@ -295,7 +283,22 @@ async def _start_indexing_pipeline(index_name: str):
295283
for workflow in pipeline_config.workflows:
296284
pipeline_job.all_workflows.append(workflow.name)
297285

298-
# add pipeline_job callback to the callback manager
286+
# create new reporters/callbacks just for this job
287+
reporters = []
288+
reporter_names = os.getenv("REPORTERS", Reporters.CONSOLE.name.upper()).split(",")
289+
for reporter_name in reporter_names:
290+
try:
291+
reporters.append(Reporters[reporter_name.upper()])
292+
except KeyError:
293+
raise ValueError(f"Unknown reporter type: {reporter_name}")
294+
workflow_callbacks = load_pipeline_reporter(
295+
index_name=index_name,
296+
num_workflow_steps=len(pipeline_job.all_workflows),
297+
reporting_dir=sanitized_index_name,
298+
reporters=reporters,
299+
)
300+
301+
# add pipeline job callback to the callback manager
299302
cast(WorkflowCallbacksManager, workflow_callbacks).register(
300303
PipelineJobWorkflowCallbacks(pipeline_job)
301304
)
@@ -312,6 +315,7 @@ async def _start_indexing_pipeline(index_name: str):
312315
# if the workflow failed, record the failure
313316
pipeline_job.failed_workflows.append(workflow_result.workflow)
314317
pipeline_job.update_db()
318+
# TODO: exit early if a workflow fails and add more detailed error logging
315319

316320
# if job is done, check if any workflow steps failed
317321
if len(pipeline_job.failed_workflows) > 0:
@@ -327,38 +331,43 @@ async def _start_indexing_pipeline(index_name: str):
327331
)
328332

329333
workflow_callbacks.on_log(
330-
f"Index Name: {index_name}, Container Name: {storage_name}\n",
331-
details={"status_message": "Indexing pipeline complete."},
334+
message=f"Indexing pipeline complete for index {index_name}.",
335+
details={
336+
"index": index_name,
337+
"storage_name": storage_name,
338+
"status_message": "indexing pipeline complete",
339+
},
332340
)
333341

334342
del workflow_callbacks # garbage collect
335343
if pipeline_job.status == PipelineJobState.FAILED:
336344
exit(1) # signal to AKS that indexing job failed
337345

338-
except Exception:
346+
except Exception as e:
339347
pipeline_job.status = PipelineJobState.FAILED
340348

341349
# update failed state in cosmos db
342350
error_details = {
343-
"error_message": "Indexing pipeline failed.",
351+
"index": index_name,
352+
"storage_name": storage_name,
344353
}
345354
# log error in local index directory logs
346355
workflow_callbacks.on_error(
347-
message=f"Index Name: {index_name}, Container Name: {storage_name}\n",
348-
cause=None,
349-
stack=None,
356+
message=f"Indexing pipeline failed for index {index_name}.",
357+
cause=e,
358+
stack=traceback.format_exc(),
350359
details=error_details,
351360
)
352361
# log error in global index directory logs
353362
reporter.on_error(
354-
f"Index Name: {index_name}, Container Name: {storage_name}\n {str(e)} \n",
355-
cause=str(e),
356-
stack=None,
363+
message=f"Indexing pipeline failed for index {index_name}.",
364+
cause=e,
365+
stack=traceback.format_exc(),
357366
details=error_details,
358367
)
359368
raise HTTPException(
360369
status_code=500,
361-
detail=f"Error occurred during indexing job for index '{index_name}'.",
370+
detail=f"Error encountered during indexing job for index {index_name}.",
362371
)
363372

364373

@@ -437,8 +446,8 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None:
437446
batch_v1.delete_namespaced_job(name=job_name, namespace=namespace)
438447
except Exception:
439448
reporter.on_error(
440-
f"Error deleting k8s job {job_name}.",
441-
details={"Container": job_name},
449+
message=f"Error deleting k8s job {job_name}.",
450+
details={"container": job_name},
442451
)
443452
pass
444453
try:
@@ -448,8 +457,8 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None:
448457
core_v1.delete_namespaced_pod(job_pod, namespace=namespace)
449458
except Exception:
450459
reporter.on_error(
451-
f"Error deleting k8s pod for job {job_name}.",
452-
details={"Container": job_name},
460+
message=f"Error deleting k8s pod for job {job_name}.",
461+
details={"container": job_name},
453462
)
454463
pass
455464

backend/src/api/query.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import inspect
55
import os
6+
import traceback
67

78
import pandas as pd
89
import yaml
@@ -131,9 +132,13 @@ async def global_query(request: GraphRequest):
131132
]
132133

133134
return GraphResponse(result=result.response, context_data=result.context_data)
134-
except Exception:
135+
except Exception as e:
135136
reporter = ReporterSingleton().get_instance()
136-
reporter.on_error("Could not perform global search.")
137+
reporter.on_error(
138+
message="Could not perform global search.",
139+
cause=e,
140+
stack=traceback.format_exc(),
141+
)
137142
raise HTTPException(status_code=500, detail=None)
138143

139144

@@ -361,8 +366,6 @@ async def local_query(request: GraphRequest):
361366
result = await search_engine.asearch(request.query)
362367

363368
# post-process the search results, mapping the index_name,index_id to allow for provenance tracking
364-
365-
# reformat context data
366369
result.context_data = _reformat_context_data(result.context_data)
367370

368371
# map title into index_name, index_id and title for provenance tracking

backend/src/main.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,23 @@
2121
from src.api.index_configuration import index_configuration_route
2222
from src.api.query import query_route
2323
from src.api.source import source_route
24+
from src.reporting import ReporterSingleton
25+
26+
27+
async def catch_all_exceptions_middleware(request: Request, call_next):
28+
"""a function to globally catch all exceptions and return a 500 response with the exception message"""
29+
try:
30+
return await call_next(request)
31+
except Exception as e:
32+
reporter = ReporterSingleton().get_instance()
33+
reporter.on_error(
34+
message="Unexpected internal server error",
35+
cause=e,
36+
stack=traceback.format_exc(),
37+
)
38+
return Response("Unexpected internal server error.", status_code=500)
39+
2440

25-
url = os.getenv("APIM_GATEWAY_URL", "localhost")
2641
version = os.getenv("GRAPHRAG_VERSION", "undefined_version")
2742

2843
app = FastAPI(
@@ -31,19 +46,6 @@
3146
title="GraphRAG",
3247
version=version,
3348
)
34-
35-
36-
async def catch_all_exceptions_middleware(request: Request, call_next):
37-
"""a function to globally catch all exceptions and return a 500 response with the exception message"""
38-
try:
39-
return await call_next(request)
40-
except Exception:
41-
# only print stacktrace if developer has enabled debug mode
42-
if os.getenv("DEBUG_MODE") == "on": # possible values: on, off
43-
print(traceback.format_exc())
44-
return Response("Unexpected internal server error", status_code=500)
45-
46-
4749
app.middleware("http")(catch_all_exceptions_middleware)
4850
app.add_middleware(
4951
CORSMiddleware,
@@ -52,8 +54,6 @@ async def catch_all_exceptions_middleware(request: Request, call_next):
5254
allow_methods=["*"],
5355
allow_headers=["*"],
5456
)
55-
56-
5757
app.include_router(data_route)
5858
app.include_router(index_route)
5959
app.include_router(query_route)

0 commit comments

Comments
 (0)