Skip to content
This repository was archived by the owner on May 27, 2025. It is now read-only.

Commit a77806f

Browse files
committed
update successful response status code
1 parent 591950a commit a77806f

File tree

12 files changed

+812
-444
lines changed

12 files changed

+812
-444
lines changed

backend/graphrag_app/api/data.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Depends,
1515
HTTPException,
1616
UploadFile,
17+
status,
1718
)
1819
from markitdown import MarkItDown, StreamInfo
1920

@@ -98,7 +99,7 @@ async def upload_file_async(
9899
stream_info=stream_info,
99100
)
100101

101-
# clean the output and upload to blob storage
102+
# remove illegal unicode characters and upload to blob storage
102103
cleaned_result = clean_output(result.text_content)
103104
await converted_blob_client.upload_blob(
104105
cleaned_result, overwrite=overwrite
@@ -107,11 +108,12 @@ async def upload_file_async(
107108
# update the file cache
108109
await update_cache(filename, file_stream, container_client)
109110
except Exception:
110-
pass
111+
# if any exception occurs, return the filename to indicate conversion/upload failures
112+
return upload_file.filename
111113

112114

113115
def clean_output(val: str, replacement: str = ""):
114-
"""Remove illegal XML characters from a string."""
116+
"""Removes unicode characters that are invalid XML characters (not valid for graphml files at least)."""
115117
# fmt: off
116118
_illegal_xml_chars_RE = re.compile(
117119
"[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]"
@@ -124,7 +126,7 @@ def clean_output(val: str, replacement: str = ""):
124126
"",
125127
summary="Upload data to a data storage container",
126128
response_model=BaseResponse,
127-
responses={200: {"model": BaseResponse}},
129+
responses={status.HTTP_201_CREATED: {"model": BaseResponse}},
128130
)
129131
async def upload_files(
130132
files: List[UploadFile],
@@ -133,18 +135,8 @@ async def upload_files(
133135
overwrite: bool = True,
134136
):
135137
"""
136-
Create a Azure Storage container and upload files to it.
137-
138-
Args:
139-
files (List[UploadFile]): A list of files to be uploaded.
140-
storage_name (str): The name of the Azure Blob Storage container to which files will be uploaded.
141-
overwrite (bool): Whether to overwrite existing files with the same name. Defaults to True. If False, files that already exist will be skipped.
142-
143-
Returns:
144-
BaseResponse: An instance of the BaseResponse model with a status message indicating the result of the upload.
145-
146-
Raises:
147-
HTTPException: If the container name is invalid or if any error occurs during the upload process.
138+
Create a Azure Storage container (if needed) and upload files. Multiple file types are supported, including pdf, powerpoint, word, excel, html, csv, json, xml, etc.
139+
The complete set of supported file types can be found in the MarkItDown (https://github.com/microsoft/markitdown) library.
148140
"""
149141
try:
150142
# create the initial cache if it doesn't exist
@@ -153,16 +145,19 @@ async def upload_files(
153145
)
154146
await create_cache(blob_container_client)
155147

156-
# upload files in batches of 1000 to avoid exceeding Azure Storage API limits
157-
batch_size = 1000
148+
# upload files in batches of 100 to avoid exceeding Azure Storage API limits
149+
processing_errors = []
150+
batch_size = 100
158151
num_batches = ceil(len(files) / batch_size)
159152
for i in range(num_batches):
160153
batch_files = files[i * batch_size : (i + 1) * batch_size]
161154
tasks = [
162155
upload_file_async(file, blob_container_client, overwrite)
163156
for file in batch_files
164157
]
165-
await asyncio.gather(*tasks)
158+
results = await asyncio.gather(*tasks)
159+
results = [r for r in results if r is not None]
160+
processing_errors.extend(results)
166161

167162
# update container-store entry in cosmosDB once upload process is successful
168163
cosmos_container_store_client = get_cosmos_container_store_client()
@@ -171,6 +166,12 @@ async def upload_files(
171166
"human_readable_name": container_name,
172167
"type": "data",
173168
})
169+
170+
if len(processing_errors) > 0:
171+
raise HTTPException(
172+
status_code=400,
173+
detail=f"Error uploading files: {processing_errors}.",
174+
)
174175
return BaseResponse(status="File upload successful.")
175176
except Exception as e:
176177
logger = load_pipeline_logger()

backend/graphrag_app/api/graph.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
APIRouter,
99
Depends,
1010
HTTPException,
11+
status,
1112
)
1213
from fastapi.responses import StreamingResponse
1314

@@ -31,6 +32,7 @@
3132
"/graphml/{container_name}",
3233
summary="Retrieve a GraphML file of the knowledge graph",
3334
response_description="GraphML file successfully downloaded",
35+
status_code=status.HTTP_200_OK,
3436
)
3537
async def get_graphml_file(
3638
container_name, sanitized_container_name: str = Depends(sanitize_name)

backend/graphrag_app/api/index.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
Depends,
1313
HTTPException,
1414
UploadFile,
15+
status,
1516
)
1617
from kubernetes import (
1718
client as kubernetes_client,
@@ -49,7 +50,7 @@
4950
"",
5051
summary="Build an index",
5152
response_model=BaseResponse,
52-
responses={200: {"model": BaseResponse}},
53+
responses={status.HTTP_202_ACCEPTED: {"model": BaseResponse}},
5354
)
5455
async def schedule_index_job(
5556
storage_container_name: str,
@@ -142,7 +143,7 @@ async def schedule_index_job(
142143
"",
143144
summary="Get all index names",
144145
response_model=IndexNameList,
145-
responses={200: {"model": IndexNameList}},
146+
responses={status.HTTP_200_OK: {"model": IndexNameList}},
146147
)
147148
async def get_all_index_names(
148149
container_store_client=Depends(get_cosmos_container_store_client),
@@ -218,7 +219,7 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None:
218219
"/{container_name}",
219220
summary="Delete a specified index",
220221
response_model=BaseResponse,
221-
responses={200: {"model": BaseResponse}},
222+
responses={status.HTTP_200_OK: {"model": BaseResponse}},
222223
)
223224
async def delete_index(
224225
container_name: str,
@@ -267,6 +268,7 @@ async def delete_index(
267268
"/status/{container_name}",
268269
summary="Track the status of an indexing job",
269270
response_model=IndexStatusResponse,
271+
status_code=status.HTTP_200_OK,
270272
)
271273
async def get_index_status(
272274
container_name: str, sanitized_container_name: str = Depends(sanitize_name)

backend/graphrag_app/api/prompt_tuning.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
APIRouter,
1212
Depends,
1313
HTTPException,
14+
status,
1415
)
1516
from graphrag.config.create_graphrag_config import create_graphrag_config
1617

@@ -27,6 +28,7 @@
2728
"/prompts",
2829
summary="Generate custom graphrag prompts based on user-provided data.",
2930
description="Generating custom prompts from user-provided data may take several minutes to run based on the amount of data used.",
31+
status_code=status.HTTP_200_OK,
3032
)
3133
async def generate_prompts(
3234
container_name: str,

backend/graphrag_app/api/query.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
APIRouter,
1111
Depends,
1212
HTTPException,
13+
status,
1314
)
1415
from graphrag.api.query import global_search, local_search
1516
from graphrag.config.create_graphrag_config import create_graphrag_config
@@ -42,7 +43,7 @@
4243
summary="Perform a global search across the knowledge graph index",
4344
description="The global query method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole.",
4445
response_model=GraphResponse,
45-
responses={200: {"model": GraphResponse}},
46+
responses={status.HTTP_200_OK: {"model": GraphResponse}},
4647
)
4748
async def global_query(request: GraphRequest):
4849
# this is a slightly modified version of the graphrag.query.cli.run_global_search method
@@ -122,7 +123,7 @@ async def global_query(request: GraphRequest):
122123
summary="Perform a local search across the knowledge graph index.",
123124
description="The local query method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).",
124125
response_model=GraphResponse,
125-
responses={200: {"model": GraphResponse}},
126+
responses={status.HTTP_200_OK: {"model": GraphResponse}},
126127
)
127128
async def local_query(request: GraphRequest):
128129
index_name = request.index_name

backend/graphrag_app/api/query_streaming.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
APIRouter,
1313
Depends,
1414
HTTPException,
15+
status,
1516
)
1617
from fastapi.responses import StreamingResponse
1718
from graphrag.api.query import (
@@ -47,6 +48,7 @@
4748
"/global",
4849
summary="Stream a response back after performing a global search",
4950
description="The global query method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole.",
51+
status_code=status.HTTP_200_OK,
5052
)
5153
async def global_search_streaming(request: GraphRequest):
5254
# this is a slightly modified version of graphrag_app.api.query.global_query() method
@@ -204,6 +206,7 @@ async def global_search_streaming(request: GraphRequest):
204206
"/local",
205207
summary="Stream a response back after performing a local search",
206208
description="The local query method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).",
209+
status_code=status.HTTP_200_OK,
207210
)
208211
async def local_search_streaming(request: GraphRequest):
209212
# this is a slightly modified version of graphrag_app.api.query.local_query() method

backend/graphrag_app/api/source.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
import traceback
66

77
import pandas as pd
8-
from fastapi import APIRouter, Depends, HTTPException
8+
from fastapi import (
9+
APIRouter,
10+
Depends,
11+
HTTPException,
12+
status,
13+
)
914

1015
from graphrag_app.logger.load_logger import load_pipeline_logger
1116
from graphrag_app.typing.models import (
@@ -43,7 +48,7 @@
4348
"/report/{container_name}/{report_id}",
4449
summary="Return a single community report.",
4550
response_model=ReportResponse,
46-
responses={200: {"model": ReportResponse}},
51+
responses={status.HTTP_200_OK: {"model": ReportResponse}},
4752
)
4853
async def get_report_info(
4954
report_id: int,
@@ -88,7 +93,7 @@ async def get_report_info(
8893
"/text/{container_name}/{text_unit_id}",
8994
summary="Return a single base text unit.",
9095
response_model=TextUnitResponse,
91-
responses={200: {"model": TextUnitResponse}},
96+
responses={status.HTTP_200_OK: {"model": TextUnitResponse}},
9297
)
9398
async def get_chunk_info(
9499
text_unit_id: str,
@@ -148,7 +153,7 @@ async def get_chunk_info(
148153
"/entity/{container_name}/{entity_id}",
149154
summary="Return a single entity.",
150155
response_model=EntityResponse,
151-
responses={200: {"model": EntityResponse}},
156+
responses={status.HTTP_200_OK: {"model": EntityResponse}},
152157
)
153158
async def get_entity_info(
154159
entity_id: int,
@@ -190,7 +195,7 @@ async def get_entity_info(
190195
"/claim/{container_name}/{claim_id}",
191196
summary="Return a single claim.",
192197
response_model=ClaimResponse,
193-
responses={200: {"model": ClaimResponse}},
198+
responses={status.HTTP_200_OK: {"model": ClaimResponse}},
194199
)
195200
async def get_claim_info(
196201
claim_id: int,
@@ -240,7 +245,7 @@ async def get_claim_info(
240245
"/relationship/{container_name}/{relationship_id}",
241246
summary="Return a single relationship.",
242247
response_model=RelationshipResponse,
243-
responses={200: {"model": RelationshipResponse}},
248+
responses={status.HTTP_200_OK: {"model": RelationshipResponse}},
244249
)
245250
async def get_relationship_info(
246251
relationship_id: int,

backend/graphrag_app/utils/common.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ async def subscription_key_check(
213213

214214
async def create_cache(container_client: ContainerClient) -> None:
215215
"""
216-
Create a file cache to track the uploaded files if it doesn't exist.
216+
Create a file cache (csv) to track uploaded files.
217217
"""
218218
try:
219219
cache_blob_client = container_client.get_blob_client("uploaded_files_cache.csv")
@@ -238,7 +238,9 @@ async def create_cache(container_client: ContainerClient) -> None:
238238

239239
async def check_cache(file_stream: BinaryIO, container_client: ContainerClient) -> bool:
240240
"""
241-
Check if the file has already been uploaded.
241+
Check a cache file to determine if a file has previously been uploaded.
242+
243+
Note: This function creates/checks a CSV file in azure storage to act as a cache of previously uploaded files.
242244
"""
243245
try:
244246
# load the file cache
@@ -265,7 +267,7 @@ async def update_cache(
265267
filename: str, file_stream: BinaryIO, container_client: ContainerClient
266268
) -> None:
267269
"""
268-
Update the file cache with the new file by appending a new row to the cache.
270+
Update the file cache (csv) with a new file by adding a new row.
269271
"""
270272
try:
271273
# Load the file cache

0 commit comments

Comments
 (0)