1414from azure .cosmos import ContainerProxy , exceptions
1515from azure .identity import DefaultAzureCredential
1616from azure .storage .blob .aio import ContainerClient
17- from fastapi import Header , HTTPException
17+ from fastapi import Header , HTTPException , status
1818
1919from graphrag_app .logger .load_logger import load_pipeline_logger
2020from graphrag_app .utils .azure_clients import AzureClientManager
2121
22+ FILE_UPLOAD_CACHE = "cache/uploaded_files.csv"
23+
2224
2325def get_df (
2426 table_path : str ,
2527) -> pd .DataFrame :
28+ """Read a parquet file from Azure Storage and return it as a pandas DataFrame."""
2629 df = pd .read_parquet (
2730 table_path ,
2831 storage_options = pandas_storage_options (),
@@ -126,7 +129,10 @@ def get_cosmos_container_store_client() -> ContainerProxy:
126129 cause = e ,
127130 stack = traceback .format_exc (),
128131 )
129- raise HTTPException (status_code = 500 , detail = "Error fetching cosmosdb client." )
132+ raise HTTPException (
133+ status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
134+ detail = "Error fetching cosmosdb client." ,
135+ )
130136
131137
132138async def get_blob_container_client (name : str ) -> ContainerClient :
@@ -144,7 +150,10 @@ async def get_blob_container_client(name: str) -> ContainerClient:
144150 cause = e ,
145151 stack = traceback .format_exc (),
146152 )
147- raise HTTPException (status_code = 500 , detail = "Error fetching storage client." )
153+ raise HTTPException (
154+ status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
155+ detail = "Error fetching storage client." ,
156+ )
148157
149158
150159def sanitize_name (container_name : str ) -> str :
@@ -191,22 +200,23 @@ def desanitize_name(sanitized_container_name: str) -> str | None:
191200 return None
192201 except Exception :
193202 raise HTTPException (
194- status_code = 500 , detail = "Error retrieving original container name."
203+ status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
204+ detail = "Error retrieving original container name." ,
195205 )
196206
197207
198208async def subscription_key_check (
199209 Ocp_Apim_Subscription_Key : Annotated [str , Header ()],
200210):
201211 """
202- Verifies if user has passed the Ocp_Apim_Subscription_Key (APIM subscription key) in the request header.
203- If it is not present, an HTTPException with a 400 status code is raised.
204- Note: this check is unnecessary (APIM validates subscription keys automatically), but this will add the key
212+ Verify if user has passed the Ocp_Apim_Subscription_Key (APIM subscription key) in the request header.
213+ Note: this check is unnecessary (APIM validates subscription keys automatically), but it effectively adds the key
205214 as a required parameter in the swagger docs page, enabling users to send requests using the swagger docs "Try it out" feature.
206215 """
207216 if not Ocp_Apim_Subscription_Key :
208217 raise HTTPException (
209- status_code = 400 , detail = "Ocp-Apim-Subscription-Key required"
218+ status_code = status .HTTP_400_BAD_REQUEST ,
219+ detail = "Ocp-Apim-Subscription-Key required" ,
210220 )
211221 return Ocp_Apim_Subscription_Key
212222
@@ -216,7 +226,7 @@ async def create_cache(container_client: ContainerClient) -> None:
216226 Create a file cache (csv) to track uploaded files.
217227 """
218228 try :
219- cache_blob_client = container_client .get_blob_client ("uploaded_files_cache.csv" )
229+ cache_blob_client = container_client .get_blob_client (FILE_UPLOAD_CACHE )
220230 if not await cache_blob_client .exists ():
221231 # create the empty file cache csv
222232 headers = [["Filename" , "Hash" ]]
@@ -231,20 +241,20 @@ async def create_cache(container_client: ContainerClient) -> None:
231241 os .remove ("uploaded_files_cache.csv" )
232242 except Exception :
233243 raise HTTPException (
234- status_code = 500 ,
244+ status_code = status . HTTP_500_INTERNAL_SERVER_ERROR ,
235245 detail = "Error creating file cache in Azure Blob Storage." ,
236246 )
237247
238248
239249async def check_cache (file_stream : BinaryIO , container_client : ContainerClient ) -> bool :
240250 """
241- Check a cache file to determine if a file has previously been uploaded.
251+ Check a file cache (csv) to determine if a file has previously been uploaded.
242252
243253 Note: This function creates/checks a CSV file in azure storage to act as a cache of previously uploaded files.
244254 """
245255 try :
246256 # load the file cache
247- cache_blob_client = container_client .get_blob_client ("uploaded_files_cache.csv" )
257+ cache_blob_client = container_client .get_blob_client (FILE_UPLOAD_CACHE )
248258 cache_download_stream = await cache_blob_client .download_blob ()
249259 cache_bytes = await cache_download_stream .readall ()
250260 cache_content = StringIO (cache_bytes .decode ("utf-8" ))
@@ -258,7 +268,7 @@ async def check_cache(file_stream: BinaryIO, container_client: ContainerClient)
258268 return False
259269 except Exception :
260270 raise HTTPException (
261- status_code = 500 ,
271+ status_code = status . HTTP_500_INTERNAL_SERVER_ERROR ,
262272 detail = "Error checking file cache in Azure Blob Storage." ,
263273 )
264274
@@ -267,11 +277,11 @@ async def update_cache(
267277 filename : str , file_stream : BinaryIO , container_client : ContainerClient
268278) -> None :
269279 """
270- Update the file cache (csv) with a new file by adding a new row.
280+ Update a file cache (csv) with a new file by adding a new row.
271281 """
272282 try :
273283 # Load the file cache
274- cache_blob_client = container_client .get_blob_client ("uploaded_files_cache.csv" )
284+ cache_blob_client = container_client .get_blob_client (FILE_UPLOAD_CACHE )
275285 cache_download_stream = await cache_blob_client .download_blob ()
276286 cache_bytes = await cache_download_stream .readall ()
277287 cache_content = StringIO (cache_bytes .decode ("utf-8" ))
@@ -297,6 +307,6 @@ async def update_cache(
297307 )
298308 except Exception :
299309 raise HTTPException (
300- status_code = 500 ,
310+ status_code = status . HTTP_500_INTERNAL_SERVER_ERROR ,
301311 detail = "Error updating file cache in Azure Blob Storage." ,
302312 )
0 commit comments