33
44import asyncio
55import re
6+ import traceback
67from math import ceil
78from typing import List
89
9- from azure .storage .blob import ContainerClient
10+ from azure .storage .blob . aio import ContainerClient
1011from fastapi import (
1112 APIRouter ,
13+ Depends ,
1214 HTTPException ,
1315 UploadFile ,
1416)
1517
16- from src .api .azure_clients import AzureClientManager
17- from src .api .common import (
18- delete_blob_container ,
19- delete_cosmos_container_item ,
20- sanitize_name ,
21- validate_blob_container_name ,
22- )
23- from src .logger import LoggerSingleton
24- from src .models import (
18+ from graphrag_app .logger .load_logger import load_pipeline_logger
19+ from graphrag_app .typing .models import (
2520 BaseResponse ,
2621 StorageNameList ,
2722)
23+ from graphrag_app .utils .common import (
24+ delete_cosmos_container_item_if_exist ,
25+ delete_storage_container_if_exist ,
26+ get_blob_container_client ,
27+ get_cosmos_container_store_client ,
28+ sanitize_name ,
29+ )
2830
2931data_route = APIRouter (
3032 prefix = "/data" ,
3436
3537@data_route .get (
3638 "" ,
37- summary = "Get all data storage containers." ,
39+ summary = "Get list of data containers." ,
3840 response_model = StorageNameList ,
3941 responses = {200 : {"model" : StorageNameList }},
4042)
41- async def get_all_data_storage_containers ():
43+ async def get_all_data_containers ():
4244 """
43- Retrieve a list of all data storage containers.
45+ Retrieve a list of all data containers.
4446 """
45- azure_client_manager = AzureClientManager ()
4647 items = []
4748 try :
48- container_store_client = azure_client_manager .get_cosmos_container_client (
49- database = "graphrag" , container = "container-store"
50- )
49+ container_store_client = get_cosmos_container_store_client ()
5150 for item in container_store_client .read_all_items ():
5251 if item ["type" ] == "data" :
5352 items .append (item ["human_readable_name" ])
54- except Exception :
55- reporter = LoggerSingleton ().get_instance ()
56- reporter .on_error ("Error getting list of blob containers." )
53+ except Exception as e :
54+ reporter = load_pipeline_logger ()
55+ reporter .error (
56+ message = "Error getting list of blob containers." ,
57+ cause = e ,
58+ stack = traceback .format_exc (),
59+ )
5760 raise HTTPException (
5861 status_code = 500 , detail = "Error getting list of blob containers."
5962 )
@@ -112,10 +115,13 @@ def __exit__(self, *args):
112115 responses = {200 : {"model" : BaseResponse }},
113116)
114117async def upload_files (
115- files : List [UploadFile ], storage_name : str , overwrite : bool = True
118+ files : List [UploadFile ],
119+ container_name : str ,
120+ sanitized_container_name : str = Depends (sanitize_name ),
121+ overwrite : bool = True ,
116122):
117123 """
118- Create a data storage container in Azure and upload files to it.
124+ Create a Azure Storage container and upload files to it.
119125
120126 Args:
121127 files (List[UploadFile]): A list of files to be uploaded.
@@ -128,80 +134,73 @@ async def upload_files(
128134 Raises:
129135 HTTPException: If the container name is invalid or if any error occurs during the upload process.
130136 """
131- sanitized_storage_name = sanitize_name (storage_name )
132- # ensure container name follows Azure Blob Storage naming conventions
133- try :
134- validate_blob_container_name (sanitized_storage_name )
135- except ValueError :
136- raise HTTPException (
137- status_code = 500 ,
138- detail = f"Invalid blob container name: '{ storage_name } '. Please try a different name." ,
139- )
140137 try :
141- azure_client_manager = AzureClientManager ()
142- blob_service_client = azure_client_manager .get_blob_service_client_async ()
143- container_client = blob_service_client .get_container_client (
144- sanitized_storage_name
145- )
146- if not await container_client .exists ():
147- await container_client .create_container ()
148-
149138 # clean files - remove illegal XML characters
150139 files = [UploadFile (Cleaner (f .file ), filename = f .filename ) for f in files ]
151140
152141 # upload files in batches of 1000 to avoid exceeding Azure Storage API limits
142+ blob_container_client = await get_blob_container_client (
143+ sanitized_container_name
144+ )
153145 batch_size = 1000
154- batches = ceil (len (files ) / batch_size )
155- for i in range (batches ):
146+ num_batches = ceil (len (files ) / batch_size )
147+ for i in range (num_batches ):
156148 batch_files = files [i * batch_size : (i + 1 ) * batch_size ]
157149 tasks = [
158- upload_file_async (file , container_client , overwrite )
150+ upload_file_async (file , blob_container_client , overwrite )
159151 for file in batch_files
160152 ]
161153 await asyncio .gather (* tasks )
162- # update container-store in cosmosDB since upload process was successful
163- container_store_client = azure_client_manager .get_cosmos_container_client (
164- database = "graphrag" , container = "container-store"
165- )
166- container_store_client .upsert_item ({
167- "id" : sanitized_storage_name ,
168- "human_readable_name" : storage_name ,
154+
155+ # update container-store entry in cosmosDB once upload process is successful
156+ cosmos_container_store_client = get_cosmos_container_store_client ()
157+ cosmos_container_store_client .upsert_item ({
158+ "id" : sanitized_container_name ,
159+ "human_readable_name" : container_name ,
169160 "type" : "data" ,
170161 })
171162 return BaseResponse (status = "File upload successful." )
172- except Exception :
173- logger = LoggerSingleton ().get_instance ()
174- logger .on_error ("Error uploading files." , details = {"files" : files })
163+ except Exception as e :
164+ logger = load_pipeline_logger ()
165+ logger .error (
166+ message = "Error uploading files." ,
167+ cause = e ,
168+ stack = traceback .format_exc (),
169+ details = {"files" : [f .filename for f in files ]},
170+ )
175171 raise HTTPException (
176172 status_code = 500 ,
177- detail = f"Error uploading files to container '{ storage_name } '." ,
173+ detail = f"Error uploading files to container '{ container_name } '." ,
178174 )
179175
180176
181177@data_route .delete (
182- "/{storage_name }" ,
178+ "/{container_name }" ,
183179 summary = "Delete a data storage container" ,
184180 response_model = BaseResponse ,
185181 responses = {200 : {"model" : BaseResponse }},
186182)
187- async def delete_files (storage_name : str ):
183+ async def delete_files (
184+ container_name : str , sanitized_container_name : str = Depends (sanitize_name )
185+ ):
188186 """
189187 Delete a specified data storage container.
190188 """
191- # azure_client_manager = AzureClientManager()
192- sanitized_storage_name = sanitize_name (storage_name )
193189 try :
194- # delete container in Azure Storage
195- delete_blob_container (sanitized_storage_name )
196- # delete entry from container-store in cosmosDB
197- delete_cosmos_container_item ("container-store" , sanitized_storage_name )
198- except Exception :
199- logger = LoggerSingleton ().get_instance ()
200- logger .on_error (
201- f"Error deleting container { storage_name } ." ,
202- details = {"Container" : storage_name },
190+ delete_storage_container_if_exist (sanitized_container_name )
191+ delete_cosmos_container_item_if_exist (
192+ "container-store" , sanitized_container_name
193+ )
194+ except Exception as e :
195+ logger = load_pipeline_logger ()
196+ logger .error (
197+ message = f"Error deleting container { container_name } ." ,
198+ cause = e ,
199+ stack = traceback .format_exc (),
200+ details = {"Container" : container_name },
203201 )
204202 raise HTTPException (
205- status_code = 500 , detail = f"Error deleting container '{ storage_name } '."
203+ status_code = 500 ,
204+ detail = f"Error deleting container '{ container_name } '." ,
206205 )
207206 return BaseResponse (status = "Success" )
0 commit comments