1515from azure .monitor .opentelemetry import configure_azure_monitor
1616from azure .search .documents .aio import SearchClient
1717from azure .search .documents .indexes .aio import SearchIndexClient
18- from azure .storage .blob .aio import BlobServiceClient
18+ from azure .storage .blob .aio import ContainerClient
19+ from azure .storage .blob .aio import StorageStreamDownloader as BlobDownloader
20+ from azure .storage .filedatalake .aio import FileSystemClient
21+ from azure .storage .filedatalake .aio import StorageStreamDownloader as DatalakeDownloader
1922from openai import AsyncAzureOpenAI , AsyncOpenAI
2023from opentelemetry .instrumentation .aiohttp_client import AioHttpClientInstrumentor
2124from opentelemetry .instrumentation .asgi import OpenTelemetryMiddleware
4952 CONFIG_CHAT_APPROACH ,
5053 CONFIG_CHAT_VISION_APPROACH ,
5154 CONFIG_GPT4V_DEPLOYED ,
55+ CONFIG_INGESTER ,
5256 CONFIG_OPENAI_CLIENT ,
5357 CONFIG_SEARCH_CLIENT ,
5458 CONFIG_SEMANTIC_RANKER_DEPLOYED ,
59+ CONFIG_USER_BLOB_CONTAINER_CLIENT ,
60+ CONFIG_USER_UPLOAD_ENABLED ,
5561 CONFIG_VECTOR_SEARCH_ENABLED ,
5662)
5763from core .authentication import AuthenticationHelper
5864from decorators import authenticated , authenticated_path
5965from error import error_dict , error_response
66+ from prepdocs import (
67+ clean_key_if_exists ,
68+ setup_embeddings_service ,
69+ setup_file_processors ,
70+ setup_search_info ,
71+ )
72+ from prepdocslib .filestrategy import UploadUserFileStrategy
73+ from prepdocslib .listfilestrategy import File
6074
6175bp = Blueprint ("routes" , __name__ , static_folder = "static" )
6276# Fix Windows registry issue with mimetypes
@@ -88,7 +102,7 @@ async def assets(path):
88102
89103@bp .route ("/content/<path>" )
90104@authenticated_path
91- async def content_file (path : str ):
105+ async def content_file (path : str , auth_claims : Dict [ str , Any ] ):
92106 """
93107 Serve content files from blob storage from within the app to keep the example self-contained.
94108 *** NOTE *** if you are using app services authentication, this route will return unauthorized to all users that are not logged in
@@ -102,12 +116,24 @@ async def content_file(path: str):
102116 path_parts = path .rsplit ("#page=" , 1 )
103117 path = path_parts [0 ]
104118 logging .info ("Opening file %s" , path )
105- blob_container_client = current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ]
119+ blob_container_client : ContainerClient = current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ]
120+ blob : Union [BlobDownloader , DatalakeDownloader ]
106121 try :
107122 blob = await blob_container_client .get_blob_client (path ).download_blob ()
108123 except ResourceNotFoundError :
109- logging .exception ("Path not found: %s" , path )
110- abort (404 )
124+ logging .info ("Path not found in general Blob container: %s" , path )
125+ if current_app .config [CONFIG_USER_UPLOAD_ENABLED ]:
126+ try :
127+ user_oid = auth_claims ["oid" ]
128+ user_blob_container_client = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
129+ user_directory_client : FileSystemClient = user_blob_container_client .get_directory_client (user_oid )
130+ file_client = user_directory_client .get_file_client (path )
131+ blob = await file_client .download_file ()
132+ except ResourceNotFoundError :
133+ logging .exception ("Path not found in DataLake: %s" , path )
134+ abort (404 )
135+ else :
136+ abort (404 )
111137 if not blob .properties or not blob .properties .has_key ("content_settings" ):
112138 abort (404 )
113139 mime_type = blob .properties ["content_settings" ]["content_type" ]
@@ -205,15 +231,78 @@ def config():
205231 "showGPT4VOptions" : current_app .config [CONFIG_GPT4V_DEPLOYED ],
206232 "showSemanticRankerOption" : current_app .config [CONFIG_SEMANTIC_RANKER_DEPLOYED ],
207233 "showVectorOption" : current_app .config [CONFIG_VECTOR_SEARCH_ENABLED ],
234+ "showUserUpload" : current_app .config [CONFIG_USER_UPLOAD_ENABLED ],
208235 }
209236 )
210237
211238
239+ @bp .post ("/upload" )
240+ @authenticated
241+ async def upload (auth_claims : dict [str , Any ]):
242+ request_files = await request .files
243+ if "file" not in request_files :
244+ # If no files were included in the request, return an error response
245+ return jsonify ({"message" : "No file part in the request" , "status" : "failed" }), 400
246+
247+ user_oid = auth_claims ["oid" ]
248+ file = request_files .getlist ("file" )[0 ]
249+ user_blob_container_client : FileSystemClient = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
250+ user_directory_client = user_blob_container_client .get_directory_client (user_oid )
251+ try :
252+ await user_directory_client .get_directory_properties ()
253+ except ResourceNotFoundError :
254+ current_app .logger .info ("Creating directory for user %s" , user_oid )
255+ await user_directory_client .create_directory ()
256+ await user_directory_client .set_access_control (owner = user_oid )
257+ file_client = user_directory_client .get_file_client (file .filename )
258+ file_io = file
259+ file_io .name = file .filename
260+ file_io = io .BufferedReader (file_io )
261+ await file_client .upload_data (file_io , overwrite = True , metadata = {"UploadedBy" : user_oid })
262+ file_io .seek (0 )
263+ ingester = current_app .config [CONFIG_INGESTER ]
264+ await ingester .add_file (File (content = file_io , acls = {"oids" : [user_oid ]}))
265+ return jsonify ({"message" : "File uploaded successfully" }), 200
266+
267+
268+ @bp .post ("/delete_uploaded" )
269+ @authenticated
270+ async def delete_uploaded (auth_claims : dict [str , Any ]):
271+ request_json = await request .get_json ()
272+ filename = request_json .get ("filename" )
273+ user_oid = auth_claims ["oid" ]
274+ user_blob_container_client : FileSystemClient = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
275+ user_directory_client = user_blob_container_client .get_directory_client (user_oid )
276+ file_client = user_directory_client .get_file_client (filename )
277+ await file_client .delete_file ()
278+ ingester = current_app .config [CONFIG_INGESTER ]
279+ await ingester .remove_file (filename , user_oid )
280+ return jsonify ({"message" : f"File { filename } deleted successfully" }), 200
281+
282+
283+ @bp .get ("/list_uploaded" )
284+ @authenticated
285+ async def list_uploaded (auth_claims : dict [str , Any ]):
286+ user_oid = auth_claims ["oid" ]
287+ user_blob_container_client : FileSystemClient = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
288+ files = []
289+ try :
290+ all_paths = user_blob_container_client .get_paths (path = user_oid )
291+ async for path in all_paths :
292+ files .append (path .name .split ("/" , 1 )[1 ])
293+ except ResourceNotFoundError as error :
294+ if error .status_code != 404 :
295+ current_app .logger .exception ("Error listing uploaded files" , error )
296+ return jsonify (files ), 200
297+
298+
212299@bp .before_app_serving
213300async def setup_clients ():
214301 # Replace these with your own values, either in environment variables or directly here
215302 AZURE_STORAGE_ACCOUNT = os .environ ["AZURE_STORAGE_ACCOUNT" ]
216303 AZURE_STORAGE_CONTAINER = os .environ ["AZURE_STORAGE_CONTAINER" ]
304+ AZURE_USERSTORAGE_ACCOUNT = os .environ .get ("AZURE_USERSTORAGE_ACCOUNT" )
305+ AZURE_USERSTORAGE_CONTAINER = os .environ .get ("AZURE_USERSTORAGE_CONTAINER" )
217306 AZURE_SEARCH_SERVICE = os .environ ["AZURE_SEARCH_SERVICE" ]
218307 AZURE_SEARCH_INDEX = os .environ ["AZURE_SEARCH_INDEX" ]
219308 AZURE_SEARCH_SECRET_NAME = os .getenv ("AZURE_SEARCH_SECRET_NAME" )
@@ -252,6 +341,7 @@ async def setup_clients():
252341 AZURE_SEARCH_SEMANTIC_RANKER = os .getenv ("AZURE_SEARCH_SEMANTIC_RANKER" , "free" ).lower ()
253342
254343 USE_GPT4V = os .getenv ("USE_GPT4V" , "" ).lower () == "true"
344+ USE_USER_UPLOAD = os .getenv ("USE_USER_UPLOAD" , "" ).lower () == "true"
255345
256346 # Use the current user identity to authenticate with Azure OpenAI, AI Search and Blob Storage (no secrets needed,
257347 # just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
@@ -262,11 +352,12 @@ async def setup_clients():
262352 # Fetch any necessary secrets from Key Vault
263353 search_key = None
264354 if AZURE_KEY_VAULT_NAME :
265- key_vault_client = SecretClient (
355+ async with SecretClient (
266356 vault_url = f"https://{ AZURE_KEY_VAULT_NAME } .vault.azure.net" , credential = azure_credential
267- )
268- search_key = AZURE_SEARCH_SECRET_NAME and (await key_vault_client .get_secret (AZURE_SEARCH_SECRET_NAME )).value
269- await key_vault_client .close ()
357+ ) as key_vault_client :
358+ search_key = (
359+ AZURE_SEARCH_SECRET_NAME and (await key_vault_client .get_secret (AZURE_SEARCH_SECRET_NAME )).value
360+ )
270361
271362 # Set up clients for AI Search and Storage
272363 search_credential : Union [AsyncTokenCredential , AzureKeyCredential ] = (
@@ -277,19 +368,22 @@ async def setup_clients():
277368 index_name = AZURE_SEARCH_INDEX ,
278369 credential = search_credential ,
279370 )
280- search_index_client = SearchIndexClient (
281- endpoint = f"https://{ AZURE_SEARCH_SERVICE } .search.windows.net" ,
282- credential = search_credential ,
283- )
284371
285- blob_client = BlobServiceClient (
286- account_url = f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" , credential = azure_credential
372+ blob_container_client = ContainerClient (
373+ f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" , AZURE_STORAGE_CONTAINER , credential = azure_credential
287374 )
288- blob_container_client = blob_client .get_container_client (AZURE_STORAGE_CONTAINER )
289375
290376 # Set up authentication helper
377+ search_index = None
378+ if AZURE_USE_AUTHENTICATION :
379+ search_index_client = SearchIndexClient (
380+ endpoint = f"https://{ AZURE_SEARCH_SERVICE } .search.windows.net" ,
381+ credential = search_credential ,
382+ )
383+ search_index = await search_index_client .get_index (AZURE_SEARCH_INDEX )
384+ await search_index_client .close ()
291385 auth_helper = AuthenticationHelper (
292- search_index = ( await search_index_client . get_index ( AZURE_SEARCH_INDEX )) if AZURE_USE_AUTHENTICATION else None ,
386+ search_index = search_index ,
293387 use_authentication = AZURE_USE_AUTHENTICATION ,
294388 server_app_id = AZURE_SERVER_APP_ID ,
295389 server_app_secret = AZURE_SERVER_APP_SECRET ,
@@ -298,6 +392,45 @@ async def setup_clients():
298392 require_access_control = AZURE_ENFORCE_ACCESS_CONTROL ,
299393 )
300394
395+ if USE_USER_UPLOAD :
396+ current_app .logger .info ("USE_USER_UPLOAD is true, setting up user upload feature" )
397+ user_blob_container_client = FileSystemClient (
398+ f"https://{ AZURE_USERSTORAGE_ACCOUNT } .dfs.core.windows.net" ,
399+ AZURE_USERSTORAGE_CONTAINER ,
400+ credential = azure_credential ,
401+ )
402+ current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ] = user_blob_container_client
403+
404+ # Set up ingester
405+ file_processors = setup_file_processors (
406+ azure_credential = azure_credential ,
407+ document_intelligence_service = os .getenv ("AZURE_DOCUMENTINTELLIGENCE_SERVICE" ),
408+ local_pdf_parser = os .getenv ("USE_LOCAL_PDF_PARSER" , "" ).lower () == "true" ,
409+ local_html_parser = os .getenv ("USE_LOCAL_HTML_PARSER" , "" ).lower () == "true" ,
410+ search_images = USE_GPT4V ,
411+ )
412+ search_info = await setup_search_info (
413+ search_service = AZURE_SEARCH_SERVICE ,
414+ index_name = AZURE_SEARCH_INDEX ,
415+ azure_credential = azure_credential ,
416+ search_key = clean_key_if_exists (search_key ),
417+ )
418+ text_embeddings_service = setup_embeddings_service (
419+ azure_credential = azure_credential ,
420+ openai_host = OPENAI_HOST ,
421+ openai_model_name = OPENAI_EMB_MODEL ,
422+ openai_service = AZURE_OPENAI_SERVICE ,
423+ openai_deployment = AZURE_OPENAI_EMB_DEPLOYMENT ,
424+ openai_dimensions = OPENAI_EMB_DIMENSIONS ,
425+ openai_key = clean_key_if_exists (OPENAI_API_KEY ),
426+ openai_org = OPENAI_ORGANIZATION ,
427+ disable_vectors = os .getenv ("USE_VECTORS" , "" ).lower () == "false" ,
428+ )
429+ ingester = UploadUserFileStrategy (
430+ search_info = search_info , embeddings = text_embeddings_service , file_processors = file_processors
431+ )
432+ current_app .config [CONFIG_INGESTER ] = ingester
433+
301434 # Used by the OpenAI SDK
302435 openai_client : AsyncOpenAI
303436
@@ -335,6 +468,7 @@ async def setup_clients():
335468 current_app .config [CONFIG_GPT4V_DEPLOYED ] = bool (USE_GPT4V )
336469 current_app .config [CONFIG_SEMANTIC_RANKER_DEPLOYED ] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
337470 current_app .config [CONFIG_VECTOR_SEARCH_ENABLED ] = os .getenv ("USE_VECTORS" , "" ).lower () != "false"
471+ current_app .config [CONFIG_USER_UPLOAD_ENABLED ] = bool (USE_USER_UPLOAD )
338472
339473 # Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
340474 # or some derivative, here we include several for exploration purposes
@@ -353,7 +487,24 @@ async def setup_clients():
353487 query_speller = AZURE_SEARCH_QUERY_SPELLER ,
354488 )
355489
490+ current_app .config [CONFIG_CHAT_APPROACH ] = ChatReadRetrieveReadApproach (
491+ search_client = search_client ,
492+ openai_client = openai_client ,
493+ auth_helper = auth_helper ,
494+ chatgpt_model = OPENAI_CHATGPT_MODEL ,
495+ chatgpt_deployment = AZURE_OPENAI_CHATGPT_DEPLOYMENT ,
496+ embedding_model = OPENAI_EMB_MODEL ,
497+ embedding_deployment = AZURE_OPENAI_EMB_DEPLOYMENT ,
498+ embedding_dimensions = OPENAI_EMB_DIMENSIONS ,
499+ sourcepage_field = KB_FIELDS_SOURCEPAGE ,
500+ content_field = KB_FIELDS_CONTENT ,
501+ query_language = AZURE_SEARCH_QUERY_LANGUAGE ,
502+ query_speller = AZURE_SEARCH_QUERY_SPELLER ,
503+ )
504+
356505 if USE_GPT4V :
506+ current_app .logger .info ("USE_GPT4V is true, setting up GPT4V approach" )
507+
357508 token_provider = get_bearer_token_provider (azure_credential , "https://cognitiveservices.azure.com/.default" )
358509
359510 current_app .config [CONFIG_ASK_VISION_APPROACH ] = RetrieveThenReadVisionApproach (
@@ -392,26 +543,13 @@ async def setup_clients():
392543 query_speller = AZURE_SEARCH_QUERY_SPELLER ,
393544 )
394545
395- current_app .config [CONFIG_CHAT_APPROACH ] = ChatReadRetrieveReadApproach (
396- search_client = search_client ,
397- openai_client = openai_client ,
398- auth_helper = auth_helper ,
399- chatgpt_model = OPENAI_CHATGPT_MODEL ,
400- chatgpt_deployment = AZURE_OPENAI_CHATGPT_DEPLOYMENT ,
401- embedding_model = OPENAI_EMB_MODEL ,
402- embedding_deployment = AZURE_OPENAI_EMB_DEPLOYMENT ,
403- embedding_dimensions = OPENAI_EMB_DIMENSIONS ,
404- sourcepage_field = KB_FIELDS_SOURCEPAGE ,
405- content_field = KB_FIELDS_CONTENT ,
406- query_language = AZURE_SEARCH_QUERY_LANGUAGE ,
407- query_speller = AZURE_SEARCH_QUERY_SPELLER ,
408- )
409-
410546
411547@bp .after_app_serving
412548async def close_clients ():
413549 await current_app .config [CONFIG_SEARCH_CLIENT ].close ()
414550 await current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ].close ()
551+ if current_app .config .get (CONFIG_USER_BLOB_CONTAINER_CLIENT ):
552+ await current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ].close ()
415553
416554
417555def create_app ():
0 commit comments