15
15
from azure .monitor .opentelemetry import configure_azure_monitor
16
16
from azure .search .documents .aio import SearchClient
17
17
from azure .search .documents .indexes .aio import SearchIndexClient
18
- from azure .storage .blob .aio import BlobServiceClient
18
+ from azure .storage .blob .aio import ContainerClient
19
+ from azure .storage .blob .aio import StorageStreamDownloader as BlobDownloader
20
+ from azure .storage .filedatalake .aio import FileSystemClient
21
+ from azure .storage .filedatalake .aio import StorageStreamDownloader as DatalakeDownloader
19
22
from openai import AsyncAzureOpenAI , AsyncOpenAI
20
23
from opentelemetry .instrumentation .aiohttp_client import AioHttpClientInstrumentor
21
24
from opentelemetry .instrumentation .asgi import OpenTelemetryMiddleware
49
52
CONFIG_CHAT_APPROACH ,
50
53
CONFIG_CHAT_VISION_APPROACH ,
51
54
CONFIG_GPT4V_DEPLOYED ,
55
+ CONFIG_INGESTER ,
52
56
CONFIG_OPENAI_CLIENT ,
53
57
CONFIG_SEARCH_CLIENT ,
54
58
CONFIG_SEMANTIC_RANKER_DEPLOYED ,
59
+ CONFIG_USER_BLOB_CONTAINER_CLIENT ,
60
+ CONFIG_USER_UPLOAD_ENABLED ,
55
61
CONFIG_VECTOR_SEARCH_ENABLED ,
56
62
)
57
63
from core .authentication import AuthenticationHelper
58
64
from decorators import authenticated , authenticated_path
59
65
from error import error_dict , error_response
66
+ from prepdocs import (
67
+ clean_key_if_exists ,
68
+ setup_embeddings_service ,
69
+ setup_file_processors ,
70
+ setup_search_info ,
71
+ )
72
+ from prepdocslib .filestrategy import UploadUserFileStrategy
73
+ from prepdocslib .listfilestrategy import File
60
74
61
75
bp = Blueprint ("routes" , __name__ , static_folder = "static" )
62
76
# Fix Windows registry issue with mimetypes
@@ -88,7 +102,7 @@ async def assets(path):
88
102
89
103
@bp .route ("/content/<path>" )
90
104
@authenticated_path
91
- async def content_file (path : str ):
105
+ async def content_file (path : str , auth_claims : Dict [ str , Any ] ):
92
106
"""
93
107
Serve content files from blob storage from within the app to keep the example self-contained.
94
108
*** NOTE *** if you are using app services authentication, this route will return unauthorized to all users that are not logged in
@@ -102,12 +116,24 @@ async def content_file(path: str):
102
116
path_parts = path .rsplit ("#page=" , 1 )
103
117
path = path_parts [0 ]
104
118
logging .info ("Opening file %s" , path )
105
- blob_container_client = current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ]
119
+ blob_container_client : ContainerClient = current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ]
120
+ blob : Union [BlobDownloader , DatalakeDownloader ]
106
121
try :
107
122
blob = await blob_container_client .get_blob_client (path ).download_blob ()
108
123
except ResourceNotFoundError :
109
- logging .exception ("Path not found: %s" , path )
110
- abort (404 )
124
+ logging .info ("Path not found in general Blob container: %s" , path )
125
+ if current_app .config [CONFIG_USER_UPLOAD_ENABLED ]:
126
+ try :
127
+ user_oid = auth_claims ["oid" ]
128
+ user_blob_container_client = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
129
+ user_directory_client : FileSystemClient = user_blob_container_client .get_directory_client (user_oid )
130
+ file_client = user_directory_client .get_file_client (path )
131
+ blob = await file_client .download_file ()
132
+ except ResourceNotFoundError :
133
+ logging .exception ("Path not found in DataLake: %s" , path )
134
+ abort (404 )
135
+ else :
136
+ abort (404 )
111
137
if not blob .properties or not blob .properties .has_key ("content_settings" ):
112
138
abort (404 )
113
139
mime_type = blob .properties ["content_settings" ]["content_type" ]
@@ -205,15 +231,78 @@ def config():
205
231
"showGPT4VOptions" : current_app .config [CONFIG_GPT4V_DEPLOYED ],
206
232
"showSemanticRankerOption" : current_app .config [CONFIG_SEMANTIC_RANKER_DEPLOYED ],
207
233
"showVectorOption" : current_app .config [CONFIG_VECTOR_SEARCH_ENABLED ],
234
+ "showUserUpload" : current_app .config [CONFIG_USER_UPLOAD_ENABLED ],
208
235
}
209
236
)
210
237
211
238
239
+ @bp .post ("/upload" )
240
+ @authenticated
241
+ async def upload (auth_claims : dict [str , Any ]):
242
+ request_files = await request .files
243
+ if "file" not in request_files :
244
+ # If no files were included in the request, return an error response
245
+ return jsonify ({"message" : "No file part in the request" , "status" : "failed" }), 400
246
+
247
+ user_oid = auth_claims ["oid" ]
248
+ file = request_files .getlist ("file" )[0 ]
249
+ user_blob_container_client : FileSystemClient = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
250
+ user_directory_client = user_blob_container_client .get_directory_client (user_oid )
251
+ try :
252
+ await user_directory_client .get_directory_properties ()
253
+ except ResourceNotFoundError :
254
+ current_app .logger .info ("Creating directory for user %s" , user_oid )
255
+ await user_directory_client .create_directory ()
256
+ await user_directory_client .set_access_control (owner = user_oid )
257
+ file_client = user_directory_client .get_file_client (file .filename )
258
+ file_io = file
259
+ file_io .name = file .filename
260
+ file_io = io .BufferedReader (file_io )
261
+ await file_client .upload_data (file_io , overwrite = True , metadata = {"UploadedBy" : user_oid })
262
+ file_io .seek (0 )
263
+ ingester = current_app .config [CONFIG_INGESTER ]
264
+ await ingester .add_file (File (content = file_io , acls = {"oids" : [user_oid ]}))
265
+ return jsonify ({"message" : "File uploaded successfully" }), 200
266
+
267
+
268
+ @bp .post ("/delete_uploaded" )
269
+ @authenticated
270
+ async def delete_uploaded (auth_claims : dict [str , Any ]):
271
+ request_json = await request .get_json ()
272
+ filename = request_json .get ("filename" )
273
+ user_oid = auth_claims ["oid" ]
274
+ user_blob_container_client : FileSystemClient = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
275
+ user_directory_client = user_blob_container_client .get_directory_client (user_oid )
276
+ file_client = user_directory_client .get_file_client (filename )
277
+ await file_client .delete_file ()
278
+ ingester = current_app .config [CONFIG_INGESTER ]
279
+ await ingester .remove_file (filename , user_oid )
280
+ return jsonify ({"message" : f"File { filename } deleted successfully" }), 200
281
+
282
+
283
+ @bp .get ("/list_uploaded" )
284
+ @authenticated
285
+ async def list_uploaded (auth_claims : dict [str , Any ]):
286
+ user_oid = auth_claims ["oid" ]
287
+ user_blob_container_client : FileSystemClient = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
288
+ files = []
289
+ try :
290
+ all_paths = user_blob_container_client .get_paths (path = user_oid )
291
+ async for path in all_paths :
292
+ files .append (path .name .split ("/" , 1 )[1 ])
293
+ except ResourceNotFoundError as error :
294
+ if error .status_code != 404 :
295
+ current_app .logger .exception ("Error listing uploaded files" , error )
296
+ return jsonify (files ), 200
297
+
298
+
212
299
@bp .before_app_serving
213
300
async def setup_clients ():
214
301
# Replace these with your own values, either in environment variables or directly here
215
302
AZURE_STORAGE_ACCOUNT = os .environ ["AZURE_STORAGE_ACCOUNT" ]
216
303
AZURE_STORAGE_CONTAINER = os .environ ["AZURE_STORAGE_CONTAINER" ]
304
+ AZURE_USERSTORAGE_ACCOUNT = os .environ .get ("AZURE_USERSTORAGE_ACCOUNT" )
305
+ AZURE_USERSTORAGE_CONTAINER = os .environ .get ("AZURE_USERSTORAGE_CONTAINER" )
217
306
AZURE_SEARCH_SERVICE = os .environ ["AZURE_SEARCH_SERVICE" ]
218
307
AZURE_SEARCH_INDEX = os .environ ["AZURE_SEARCH_INDEX" ]
219
308
AZURE_SEARCH_SECRET_NAME = os .getenv ("AZURE_SEARCH_SECRET_NAME" )
@@ -252,6 +341,7 @@ async def setup_clients():
252
341
AZURE_SEARCH_SEMANTIC_RANKER = os .getenv ("AZURE_SEARCH_SEMANTIC_RANKER" , "free" ).lower ()
253
342
254
343
USE_GPT4V = os .getenv ("USE_GPT4V" , "" ).lower () == "true"
344
+ USE_USER_UPLOAD = os .getenv ("USE_USER_UPLOAD" , "" ).lower () == "true"
255
345
256
346
# Use the current user identity to authenticate with Azure OpenAI, AI Search and Blob Storage (no secrets needed,
257
347
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
@@ -262,11 +352,12 @@ async def setup_clients():
262
352
# Fetch any necessary secrets from Key Vault
263
353
search_key = None
264
354
if AZURE_KEY_VAULT_NAME :
265
- key_vault_client = SecretClient (
355
+ async with SecretClient (
266
356
vault_url = f"https://{ AZURE_KEY_VAULT_NAME } .vault.azure.net" , credential = azure_credential
267
- )
268
- search_key = AZURE_SEARCH_SECRET_NAME and (await key_vault_client .get_secret (AZURE_SEARCH_SECRET_NAME )).value
269
- await key_vault_client .close ()
357
+ ) as key_vault_client :
358
+ search_key = (
359
+ AZURE_SEARCH_SECRET_NAME and (await key_vault_client .get_secret (AZURE_SEARCH_SECRET_NAME )).value
360
+ )
270
361
271
362
# Set up clients for AI Search and Storage
272
363
search_credential : Union [AsyncTokenCredential , AzureKeyCredential ] = (
@@ -277,19 +368,22 @@ async def setup_clients():
277
368
index_name = AZURE_SEARCH_INDEX ,
278
369
credential = search_credential ,
279
370
)
280
- search_index_client = SearchIndexClient (
281
- endpoint = f"https://{ AZURE_SEARCH_SERVICE } .search.windows.net" ,
282
- credential = search_credential ,
283
- )
284
371
285
- blob_client = BlobServiceClient (
286
- account_url = f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" , credential = azure_credential
372
+ blob_container_client = ContainerClient (
373
+ f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" , AZURE_STORAGE_CONTAINER , credential = azure_credential
287
374
)
288
- blob_container_client = blob_client .get_container_client (AZURE_STORAGE_CONTAINER )
289
375
290
376
# Set up authentication helper
377
+ search_index = None
378
+ if AZURE_USE_AUTHENTICATION :
379
+ search_index_client = SearchIndexClient (
380
+ endpoint = f"https://{ AZURE_SEARCH_SERVICE } .search.windows.net" ,
381
+ credential = search_credential ,
382
+ )
383
+ search_index = await search_index_client .get_index (AZURE_SEARCH_INDEX )
384
+ await search_index_client .close ()
291
385
auth_helper = AuthenticationHelper (
292
- search_index = ( await search_index_client . get_index ( AZURE_SEARCH_INDEX )) if AZURE_USE_AUTHENTICATION else None ,
386
+ search_index = search_index ,
293
387
use_authentication = AZURE_USE_AUTHENTICATION ,
294
388
server_app_id = AZURE_SERVER_APP_ID ,
295
389
server_app_secret = AZURE_SERVER_APP_SECRET ,
@@ -298,6 +392,45 @@ async def setup_clients():
298
392
require_access_control = AZURE_ENFORCE_ACCESS_CONTROL ,
299
393
)
300
394
395
+ if USE_USER_UPLOAD :
396
+ current_app .logger .info ("USE_USER_UPLOAD is true, setting up user upload feature" )
397
+ user_blob_container_client = FileSystemClient (
398
+ f"https://{ AZURE_USERSTORAGE_ACCOUNT } .dfs.core.windows.net" ,
399
+ AZURE_USERSTORAGE_CONTAINER ,
400
+ credential = azure_credential ,
401
+ )
402
+ current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ] = user_blob_container_client
403
+
404
+ # Set up ingester
405
+ file_processors = setup_file_processors (
406
+ azure_credential = azure_credential ,
407
+ document_intelligence_service = os .getenv ("AZURE_DOCUMENTINTELLIGENCE_SERVICE" ),
408
+ local_pdf_parser = os .getenv ("USE_LOCAL_PDF_PARSER" , "" ).lower () == "true" ,
409
+ local_html_parser = os .getenv ("USE_LOCAL_HTML_PARSER" , "" ).lower () == "true" ,
410
+ search_images = USE_GPT4V ,
411
+ )
412
+ search_info = await setup_search_info (
413
+ search_service = AZURE_SEARCH_SERVICE ,
414
+ index_name = AZURE_SEARCH_INDEX ,
415
+ azure_credential = azure_credential ,
416
+ search_key = clean_key_if_exists (search_key ),
417
+ )
418
+ text_embeddings_service = setup_embeddings_service (
419
+ azure_credential = azure_credential ,
420
+ openai_host = OPENAI_HOST ,
421
+ openai_model_name = OPENAI_EMB_MODEL ,
422
+ openai_service = AZURE_OPENAI_SERVICE ,
423
+ openai_deployment = AZURE_OPENAI_EMB_DEPLOYMENT ,
424
+ openai_dimensions = OPENAI_EMB_DIMENSIONS ,
425
+ openai_key = clean_key_if_exists (OPENAI_API_KEY ),
426
+ openai_org = OPENAI_ORGANIZATION ,
427
+ disable_vectors = os .getenv ("USE_VECTORS" , "" ).lower () == "false" ,
428
+ )
429
+ ingester = UploadUserFileStrategy (
430
+ search_info = search_info , embeddings = text_embeddings_service , file_processors = file_processors
431
+ )
432
+ current_app .config [CONFIG_INGESTER ] = ingester
433
+
301
434
# Used by the OpenAI SDK
302
435
openai_client : AsyncOpenAI
303
436
@@ -335,6 +468,7 @@ async def setup_clients():
335
468
current_app .config [CONFIG_GPT4V_DEPLOYED ] = bool (USE_GPT4V )
336
469
current_app .config [CONFIG_SEMANTIC_RANKER_DEPLOYED ] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
337
470
current_app .config [CONFIG_VECTOR_SEARCH_ENABLED ] = os .getenv ("USE_VECTORS" , "" ).lower () != "false"
471
+ current_app .config [CONFIG_USER_UPLOAD_ENABLED ] = bool (USE_USER_UPLOAD )
338
472
339
473
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
340
474
# or some derivative, here we include several for exploration purposes
@@ -353,7 +487,24 @@ async def setup_clients():
353
487
query_speller = AZURE_SEARCH_QUERY_SPELLER ,
354
488
)
355
489
490
+ current_app .config [CONFIG_CHAT_APPROACH ] = ChatReadRetrieveReadApproach (
491
+ search_client = search_client ,
492
+ openai_client = openai_client ,
493
+ auth_helper = auth_helper ,
494
+ chatgpt_model = OPENAI_CHATGPT_MODEL ,
495
+ chatgpt_deployment = AZURE_OPENAI_CHATGPT_DEPLOYMENT ,
496
+ embedding_model = OPENAI_EMB_MODEL ,
497
+ embedding_deployment = AZURE_OPENAI_EMB_DEPLOYMENT ,
498
+ embedding_dimensions = OPENAI_EMB_DIMENSIONS ,
499
+ sourcepage_field = KB_FIELDS_SOURCEPAGE ,
500
+ content_field = KB_FIELDS_CONTENT ,
501
+ query_language = AZURE_SEARCH_QUERY_LANGUAGE ,
502
+ query_speller = AZURE_SEARCH_QUERY_SPELLER ,
503
+ )
504
+
356
505
if USE_GPT4V :
506
+ current_app .logger .info ("USE_GPT4V is true, setting up GPT4V approach" )
507
+
357
508
token_provider = get_bearer_token_provider (azure_credential , "https://cognitiveservices.azure.com/.default" )
358
509
359
510
current_app .config [CONFIG_ASK_VISION_APPROACH ] = RetrieveThenReadVisionApproach (
@@ -392,26 +543,13 @@ async def setup_clients():
392
543
query_speller = AZURE_SEARCH_QUERY_SPELLER ,
393
544
)
394
545
395
- current_app .config [CONFIG_CHAT_APPROACH ] = ChatReadRetrieveReadApproach (
396
- search_client = search_client ,
397
- openai_client = openai_client ,
398
- auth_helper = auth_helper ,
399
- chatgpt_model = OPENAI_CHATGPT_MODEL ,
400
- chatgpt_deployment = AZURE_OPENAI_CHATGPT_DEPLOYMENT ,
401
- embedding_model = OPENAI_EMB_MODEL ,
402
- embedding_deployment = AZURE_OPENAI_EMB_DEPLOYMENT ,
403
- embedding_dimensions = OPENAI_EMB_DIMENSIONS ,
404
- sourcepage_field = KB_FIELDS_SOURCEPAGE ,
405
- content_field = KB_FIELDS_CONTENT ,
406
- query_language = AZURE_SEARCH_QUERY_LANGUAGE ,
407
- query_speller = AZURE_SEARCH_QUERY_SPELLER ,
408
- )
409
-
410
546
411
547
@bp .after_app_serving
412
548
async def close_clients ():
413
549
await current_app .config [CONFIG_SEARCH_CLIENT ].close ()
414
550
await current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ].close ()
551
+ if current_app .config .get (CONFIG_USER_BLOB_CONTAINER_CLIENT ):
552
+ await current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ].close ()
415
553
416
554
417
555
def create_app ():
0 commit comments