Azure-Samples
diff --git a/‎app/backend/app.py‎
Lines changed: 11 additions & 32 deletions b/‎app/backend/app.py‎
Lines changed: 11 additions & 32 deletions
diff --git a/‎app/backend/approaches/approach.py‎
Lines changed: 28 additions & 7 deletions b/‎app/backend/approaches/approach.py‎
Lines changed: 28 additions & 7 deletions
diff --git a/‎app/backend/approaches/chatreadretrieveread.py‎
Lines changed: 12 additions & 7 deletions b/‎app/backend/approaches/chatreadretrieveread.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎app/backend/approaches/prompts/ask_answer_question.prompty‎
Lines changed: 3 additions & 2 deletions b/‎app/backend/approaches/prompts/ask_answer_question.prompty‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎app/backend/approaches/prompts/chat_answer_question.prompty‎
Lines changed: 4 additions & 2 deletions b/‎app/backend/approaches/prompts/chat_answer_question.prompty‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎app/backend/approaches/retrievethenread.py‎
Lines changed: 7 additions & 4 deletions b/‎app/backend/approaches/retrievethenread.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎app/backend/config.py‎
Lines changed: 1 addition & 0 deletions b/‎app/backend/config.py‎
Lines changed: 1 addition & 0 deletions
@@ -66,6 +66,7 @@
     CONFIG_CREDENTIAL,
     CONFIG_DEFAULT_REASONING_EFFORT,
     CONFIG_IMAGE_BLOB_CONTAINER_CLIENT,  # Added this line
+    CONFIG_IMAGE_DATALAKE_CLIENT,
     CONFIG_INGESTER,
     CONFIG_LANGUAGE_PICKER_ENABLED,
     CONFIG_MULTIMODAL_ENABLED,
@@ -354,7 +355,6 @@ async def speech():
 async def upload(auth_claims: dict[str, Any]):
     request_files = await request.files
     if "file" not in request_files:
-        # If no files were included in the request, return an error response
         return jsonify({"message": "No file part in the request", "status": "failed"}), 400
 
     user_oid = auth_claims["oid"]
@@ -372,10 +372,8 @@ async def delete_uploaded(auth_claims: dict[str, Any]):
     request_json = await request.get_json()
     filename = request_json.get("filename")
     user_oid = auth_claims["oid"]
-    user_blob_container_client: FileSystemClient = current_app.config[CONFIG_USER_BLOB_CONTAINER_CLIENT]
-    user_directory_client = user_blob_container_client.get_directory_client(user_oid)
-    file_client = user_directory_client.get_file_client(filename)
-    await file_client.delete_file()
+    adls_manager = AdlsBlobManager(current_app.config[CONFIG_USER_BLOB_CONTAINER_CLIENT])
+    await adls_manager.remove_blob(filename, user_oid)
     ingester = current_app.config[CONFIG_INGESTER]
     await ingester.remove_file(filename, user_oid)
     return jsonify({"message": f"File {filename} deleted successfully"}), 200
@@ -388,31 +386,8 @@ async def list_uploaded(auth_claims: dict[str, Any]):
     Only returns files directly in the user's directory, not in subdirectories.
     Excludes image files and the images directory."""
     user_oid = auth_claims["oid"]
-    user_blob_container_client: FileSystemClient = current_app.config[CONFIG_USER_BLOB_CONTAINER_CLIENT]
-    files = []
-    try:
-        all_paths = user_blob_container_client.get_paths(path=user_oid)
-        async for path in all_paths:
-            # Split path into parts (user_oid/filename or user_oid/directory/files)
-            path_parts = path.name.split("/", 1)
-            if len(path_parts) != 2:
-                continue
-
-            filename = path_parts[1]
-            # Only include files that are:
-            # 1. Directly in the user's directory (no additional slashes)
-            # 2. Not image files
-            # 3. Not in a directory containing 'images'
-            if (
-                "/" not in filename
-                and not any(filename.lower().endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp"])
-                and "images" not in filename
-            ):
-                files.append(filename)
-    except ResourceNotFoundError as error:
-        if error.status_code != 404:
-            current_app.logger.exception("Error listing uploaded files", error)
-        # Return empty list for 404 (no directory) as this is expected for new users
+    adls_manager = AdlsBlobManager(current_app.config[CONFIG_USER_BLOB_CONTAINER_CLIENT])
+    files = await adls_manager.list_blobs(user_oid)
     return jsonify(files), 200
 
 
@@ -691,7 +666,8 @@ async def setup_clients():
         agent_client=agent_client,
         openai_client=openai_client,
         auth_helper=auth_helper,
-        images_blob_container_client=image_blob_container_client,
+        image_blob_container_client=image_blob_container_client,
+        image_datalake_client=user_blob_container_client,
         chatgpt_model=OPENAI_CHATGPT_MODEL,
         chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
         embedding_model=OPENAI_EMB_MODEL,
@@ -718,7 +694,8 @@ async def setup_clients():
         agent_client=agent_client,
         openai_client=openai_client,
         auth_helper=auth_helper,
-        images_blob_container_client=image_blob_container_client,
+        image_blob_container_client=image_blob_container_client,
+        image_datalake_client=user_blob_container_client,
         chatgpt_model=OPENAI_CHATGPT_MODEL,
         chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
         embedding_model=OPENAI_EMB_MODEL,
@@ -745,6 +722,8 @@ async def close_clients():
         await current_app.config[CONFIG_USER_BLOB_CONTAINER_CLIENT].close()
     if current_app.config.get(CONFIG_IMAGE_BLOB_CONTAINER_CLIENT):
         await current_app.config[CONFIG_IMAGE_BLOB_CONTAINER_CLIENT].close()
+    if current_app.config.get(CONFIG_IMAGE_DATALAKE_CLIENT):
+        await current_app.config[CONFIG_IMAGE_DATALAKE_CLIENT].close()
 
 
 def create_app():
 
@@ -24,6 +24,7 @@
     VectorQuery,
 )
 from azure.storage.blob.aio import ContainerClient
+from azure.storage.filedatalake.aio import FileSystemClient
 from openai import AsyncOpenAI, AsyncStream
 from openai.types import CompletionUsage
 from openai.types.chat import (
@@ -175,7 +176,8 @@ def __init__(
         multimodal_enabled: bool = False,
         vision_endpoint: Optional[str] = None,
         vision_token_provider: Optional[Callable[[], Awaitable[str]]] = None,
-        images_blob_container_client: Optional[ContainerClient] = None,
+        image_blob_container_client: Optional[ContainerClient] = None,
+        image_datalake_client: Optional[FileSystemClient] = None,
     ):
         self.search_client = search_client
         self.openai_client = openai_client
@@ -193,7 +195,23 @@ def __init__(
         self.multimodal_enabled = multimodal_enabled
         self.vision_endpoint = vision_endpoint
         self.vision_token_provider = vision_token_provider
-        self.images_blob_container_client = images_blob_container_client
+        self.image_blob_container_client = image_blob_container_client
+        self.image_datalake_client = image_datalake_client
+
+    def get_storage_client_for_url(self, url: str) -> Optional[Union[ContainerClient, FileSystemClient]]:
+        """
+        Determines which storage client to use for a given URL.
+
+        Args:
+            url: The URL or path of the image
+
+        Returns:
+            Either the ContainerClient for Blob Storage or FileSystemClient for Data Lake Storage,
+            based on the URL pattern. Returns None if no matching client is available.
+        """
+        if ".dfs.core.windows.net" in url and self.image_datalake_client:
+            return self.image_datalake_client
+        return self.image_blob_container_client
 
     def get_default_llm_inputs(self) -> str:
         """
@@ -363,7 +381,11 @@ async def run_agentic_retrieval(
         return response, results
 
     async def get_sources_content(
-        self, results: list[Document], use_semantic_captions: bool, use_image_sources: bool
+        self,
+        results: list[Document],
+        use_semantic_captions: bool,
+        use_image_sources: bool,
+        user_oid: Optional[str] = None,
     ) -> tuple[list[str], list[str], list[str]]:
         """
         Extracts text and image sources from the search results.
@@ -395,14 +417,13 @@ def nonewlines(s: str) -> str:
                 text_sources.append(f"{citation}: {nonewlines(doc.content or '')}")
 
             if use_image_sources and hasattr(doc, "images") and doc.images:
-                if self.images_blob_container_client is None:
-                    raise ValueError("The images blob container client must be set to use image sources.")
                 for img in doc.images:
                     # Skip if we've already processed this URL
-                    if img["url"] in seen_urls:
+                    if img["url"] in seen_urls or not img["url"]:
                         continue
                     seen_urls.add(img["url"])
-                    url = await download_blob_as_base64(self.images_blob_container_client, img["url"])
+                    storage_client = self.get_storage_client_for_url(img["url"])
+                    url = await download_blob_as_base64(storage_client, img["url"], user_oid=user_oid)
                     if url:
                         image_sources.append(url)
                     citations.append(self.get_image_citation(doc.sourcepage or "", img["url"]))
 
@@ -7,6 +7,7 @@
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
 from azure.storage.blob.aio import ContainerClient
+from azure.storage.filedatalake.aio import FileSystemClient
 from openai import AsyncOpenAI, AsyncStream
 from openai.types.chat import (
     ChatCompletion,
@@ -61,7 +62,8 @@ def __init__(
         multimodal_enabled: bool = False,
         vision_endpoint: Optional[str] = None,
         vision_token_provider: Optional[Callable[[], Awaitable[str]]] = None,
-        images_blob_container_client: Optional[ContainerClient] = None,
+        image_blob_container_client: Optional[ContainerClient] = None,
+        image_datalake_client: Optional[FileSystemClient] = None,
     ):
         self.search_client = search_client
         self.search_index_name = search_index_name
@@ -70,7 +72,8 @@ def __init__(
         self.agent_client = agent_client
         self.openai_client = openai_client
         self.auth_helper = auth_helper
-        self.images_blob_container_client = images_blob_container_client
+        self.image_blob_container_client = image_blob_container_client
+        self.image_datalake_client = image_datalake_client
         self.chatgpt_model = chatgpt_model
         self.chatgpt_deployment = chatgpt_deployment
         self.embedding_deployment = embedding_deployment
@@ -300,6 +303,7 @@ async def run_search_approach(
             VectorFieldType.TEXT_AND_IMAGE_EMBEDDINGS,
         ]
         use_image_sources = llm_inputs_enum in [LLMInputType.TEXT_AND_IMAGES, LLMInputType.IMAGES]
+        use_text_sources = llm_inputs_enum in [LLMInputType.TEXT_AND_IMAGES, LLMInputType.TEXTS]
 
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
@@ -354,11 +358,11 @@ async def run_search_approach(
 
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
         text_sources, image_sources, citations = await self.get_sources_content(
-            results, use_semantic_captions, use_image_sources=use_image_sources
+            results, use_semantic_captions, use_image_sources=use_image_sources, user_oid=auth_claims["oid"]
         )
 
         extra_info = ExtraInfo(
-            DataPoints(text=text_sources, images=image_sources, citations=citations),
+            DataPoints(text=text_sources if use_text_sources else [], images=image_sources, citations=citations),
             thoughts=[
                 self.format_thought_step_for_chatcompletion(
                     title="Prompt to generate search query",
@@ -417,19 +421,20 @@ async def run_agentic_retrieval_approach(
             results_merge_strategy=results_merge_strategy,
         )
 
-        # Determine if we should use image sources based on overrides or defaults
+        # Determine if we should use text/image sources based on overrides or defaults
         llm_inputs = overrides.get("llm_inputs")
         if llm_inputs is None:
             llm_inputs = self.get_default_llm_inputs()
         llm_inputs_enum = LLMInputType(llm_inputs) if llm_inputs is not None else None
         use_image_sources = llm_inputs_enum in [LLMInputType.TEXT_AND_IMAGES, LLMInputType.IMAGES]
+        use_text_sources = llm_inputs_enum in [LLMInputType.TEXT_AND_IMAGES, LLMInputType.TEXTS]
 
         text_sources, image_sources, citations = await self.get_sources_content(
-            results, use_semantic_captions=False, use_image_sources=use_image_sources
+            results, use_semantic_captions=False, use_image_sources=use_image_sources, user_oid=auth_claims["oid"]
         )
 
         extra_info = ExtraInfo(
-            DataPoints(text=text_sources, images=image_sources, citations=citations),
+            DataPoints(text=text_sources if use_text_sources else [], images=image_sources, citations=citations),
             thoughts=[
                 ThoughtStep(
                     "Use agentic retrieval",
 
@@ -20,8 +20,9 @@ Answer the following question using only the data provided in the sources below.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response.
 If you cannot answer using the sources below, say you don't know. Use below example to answer.
 {% if image_sources %}
-Each image source has the original document file name in the top left corner of the image with coordinates (10,10) pixels and is in the format Document:<document_name.ext#page=N>.
-The filename of the actual image is in the top right corner of the image and is in the format Figure:<image_name.png>.
+Each image source has the document file name in the top left corner of the image with coordinates (10,10) pixels with format <filename.ext#page=N>,
+and the image figure name is right-aligned in the top right corner of the image.
+The filename of the actual image is in the top right corner of the image and is in the format <figureN_N.png>.
 Each text source starts in a new line and has the file name followed by colon and the actual information.
 Always include the source document filename for each fact you use in the response in the format: [document_name.ext#page=N].
 If you are referencing an image, add the image filename in the format: [document_name.ext#page=N(image_name.png)].
 
@@ -25,7 +25,9 @@ Answer ONLY with the facts listed in the list of sources below. If there isn't e
 If the question is not in English, answer in the language used in the question.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
 {% if include_images %}
-Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:<file_name>
+Each image source has the document file name in the top left corner of the image with coordinates (10,10) pixels with format <filename.ext#page=N>,
+and the image figure name is right-aligned in the top right corner of the image.
+The filename of the actual image is in the top right corner of the image and is in the format <figureN_N.png>.
 Each text source starts in a new line and has the file name followed by colon and the actual information
 Always include the source name from the image or text for each fact you use in the response in the format: [filename]
 Answer the following question using only the data provided in the sources below.
@@ -62,4 +64,4 @@ Sources:
 {% for text_source in text_sources %}
 {{ text_source }}
 {% endfor %}
-{% endif %}
+{% endif %}
@@ -5,6 +5,7 @@
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
 from azure.storage.blob.aio import ContainerClient
+from azure.storage.filedatalake.aio import FileSystemClient
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
 
@@ -52,7 +53,8 @@ def __init__(
         multimodal_enabled: bool = False,
         vision_endpoint: Optional[str] = None,
         vision_token_provider: Optional[Callable[[], Awaitable[str]]] = None,
-        images_blob_container_client: Optional[ContainerClient] = None,
+        image_blob_container_client: Optional[ContainerClient] = None,
+        image_datalake_client: Optional[FileSystemClient] = None,
     ):
         self.search_client = search_client
         self.search_index_name = search_index_name
@@ -62,7 +64,8 @@ def __init__(
         self.chatgpt_deployment = chatgpt_deployment
         self.openai_client = openai_client
         self.auth_helper = auth_helper
-        self.images_blob_container_client = images_blob_container_client
+        self.image_blob_container_client = image_blob_container_client
+        self.image_datalake_client = image_datalake_client
         self.chatgpt_model = chatgpt_model
         self.embedding_model = embedding_model
         self.embedding_dimensions = embedding_dimensions
@@ -200,7 +203,7 @@ async def run_search_approach(
         )
 
         text_sources, image_sources, citations = await self.get_sources_content(
-            results, use_semantic_captions, use_image_sources=use_image_sources
+            results, use_semantic_captions, use_image_sources=use_image_sources, user_oid=auth_claims["oid"]
         )
 
         return ExtraInfo(
@@ -261,7 +264,7 @@ async def run_agentic_retrieval_approach(
         use_image_sources = llm_inputs_enum in [LLMInputType.TEXT_AND_IMAGES, LLMInputType.IMAGES]
 
         text_sources, image_sources, citations = await self.get_sources_content(
-            results, use_semantic_captions=False, use_image_sources=use_image_sources
+            results, use_semantic_captions=False, use_image_sources=use_image_sources, user_oid=auth_claims["oid"]
         )
 
         extra_info = ExtraInfo(
 
@@ -4,6 +4,7 @@
 CONFIG_CHAT_APPROACH = "chat_approach"
 CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
 CONFIG_IMAGE_BLOB_CONTAINER_CLIENT = "image_blob_container_client"
+CONFIG_IMAGE_DATALAKE_CLIENT = "image_datalake_client"
 CONFIG_USER_UPLOAD_ENABLED = "user_upload_enabled"
 CONFIG_USER_BLOB_CONTAINER_CLIENT = "user_blob_container_client"
 CONFIG_AUTH_CLIENT = "auth_client"