Azure-Samples
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎app/backend/app.py‎
Lines changed: 33 additions & 13 deletions b/‎app/backend/app.py‎
Lines changed: 33 additions & 13 deletions
diff --git a/‎app/backend/config.py‎
Lines changed: 2 additions & 0 deletions b/‎app/backend/config.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎app/frontend/src/api/models.ts‎
Lines changed: 2 additions & 0 deletions b/‎app/frontend/src/api/models.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎app/frontend/src/pages/chat/Chat.tsx‎
Lines changed: 24 additions & 11 deletions b/‎app/frontend/src/pages/chat/Chat.tsx‎
Lines changed: 24 additions & 11 deletions
diff --git a/‎app/frontend/src/pages/oneshot/OneShot.tsx‎
Lines changed: 25 additions & 11 deletions b/‎app/frontend/src/pages/oneshot/OneShot.tsx‎
Lines changed: 25 additions & 11 deletions
diff --git a/‎docs/deploy_lowcost.md‎
Lines changed: 79 additions & 0 deletions b/‎docs/deploy_lowcost.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎infra/core/search/search-services.bicep‎
Lines changed: 2 additions & 1 deletion b/‎infra/core/search/search-services.bicep‎
Lines changed: 2 additions & 1 deletion
@@ -95,7 +95,8 @@ However, you can try the [Azure pricing calculator](https://azure.com/e/8ffbe5b1
 - Azure Blob Storage: Standard tier with ZRS (Zone-redundant storage). Pricing per storage and read operations. [Pricing](https://azure.microsoft.com/pricing/details/storage/blobs/)
 - Azure Monitor: Pay-as-you-go tier. Costs based on data ingested. [Pricing](https://azure.microsoft.com/pricing/details/monitor/)
 
-To reduce costs, you can switch to free SKUs for Azure App Service and Azure AI Document Intelligence by changing the parameters file under the `infra` folder. There are some limits to consider; for example, the free Azure AI Document Intelligence resource only analyzes the first 2 pages of each document. You can also reduce costs associated with the Azure AI Document Intelligence by reducing the number of documents in the `data` folder, or by removing the postprovision hook in `azure.yaml` that runs the `prepdocs.py` script.
+To reduce costs, you can switch to free SKUs for various services, but those SKUs have limitations.
+See this guide on [deploying with minimal costs](docs/deploy_lowcost.md) for more details.
 
 ⚠️ To avoid unnecessary costs, remember to take down your app if it's no longer in use,
 either by deleting the resource group in the Portal or running `azd down`.
 
@@ -5,8 +5,10 @@
 import mimetypes
 import os
 from pathlib import Path
-from typing import Any, AsyncGenerator, Dict, cast
+from typing import Any, AsyncGenerator, Dict, Union, cast
 
+from azure.core.credentials import AzureKeyCredential
+from azure.core.credentials_async import AsyncTokenCredential
 from azure.core.exceptions import ResourceNotFoundError
 from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
 from azure.keyvault.secrets.aio import SecretClient
@@ -46,6 +48,8 @@
     CONFIG_GPT4V_DEPLOYED,
     CONFIG_OPENAI_CLIENT,
     CONFIG_SEARCH_CLIENT,
+    CONFIG_SEMANTIC_RANKER_DEPLOYED,
+    CONFIG_VECTOR_SEARCH_ENABLED,
 )
 from core.authentication import AuthenticationHelper
 from decorators import authenticated, authenticated_path
@@ -192,7 +196,13 @@ def auth_setup():
 
 @bp.route("/config", methods=["GET"])
 def config():
-    return jsonify({"showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED]})
+    return jsonify(
+        {
+            "showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
+            "showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
+            "showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
+        }
+    )
 
 
 @bp.before_app_serving
@@ -202,6 +212,7 @@ async def setup_clients():
     AZURE_STORAGE_CONTAINER = os.environ["AZURE_STORAGE_CONTAINER"]
     AZURE_SEARCH_SERVICE = os.environ["AZURE_SEARCH_SERVICE"]
     AZURE_SEARCH_INDEX = os.environ["AZURE_SEARCH_INDEX"]
+    SEARCH_SECRET_NAME = os.getenv("SEARCH_SECRET_NAME")
     VISION_SECRET_NAME = os.getenv("VISION_SECRET_NAME")
     AZURE_KEY_VAULT_NAME = os.getenv("AZURE_KEY_VAULT_NAME")
     # Shared by all OpenAI deployments
@@ -232,6 +243,7 @@ async def setup_clients():
 
     AZURE_SEARCH_QUERY_LANGUAGE = os.getenv("AZURE_SEARCH_QUERY_LANGUAGE", "en-us")
     AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER", "lexicon")
+    AZURE_SEARCH_SEMANTIC_RANKER = os.getenv("AZURE_SEARCH_SEMANTIC_RANKER", "free").lower()
 
     USE_GPT4V = os.getenv("USE_GPT4V", "").lower() == "true"
 
@@ -241,16 +253,31 @@ async def setup_clients():
     # If you encounter a blocking error during a DefaultAzureCredential resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
     azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
 
+    # Fetch any necessary secrets from Key Vault
+    vision_key = None
+    search_key = None
+    if AZURE_KEY_VAULT_NAME and (VISION_SECRET_NAME or SEARCH_SECRET_NAME):
+        key_vault_client = SecretClient(
+            vault_url=f"https://{AZURE_KEY_VAULT_NAME}.vault.azure.net", credential=azure_credential
+        )
+        vision_key = (await key_vault_client.get_secret(VISION_SECRET_NAME)).value
+        search_key = (await key_vault_client.get_secret(SEARCH_SECRET_NAME)).value
+        await key_vault_client.close()
+
     # Set up clients for AI Search and Storage
+    search_credential: Union[AsyncTokenCredential, AzureKeyCredential] = (
+        AzureKeyCredential(search_key) if search_key else azure_credential
+    )
     search_client = SearchClient(
         endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
         index_name=AZURE_SEARCH_INDEX,
-        credential=azure_credential,
+        credential=search_credential,
     )
     search_index_client = SearchIndexClient(
         endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
-        credential=azure_credential,
+        credential=search_credential,
     )
+
     blob_client = BlobServiceClient(
         account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", credential=azure_credential
     )
@@ -267,15 +294,6 @@ async def setup_clients():
         require_access_control=AZURE_ENFORCE_ACCESS_CONTROL,
     )
 
-    vision_key = None
-    if VISION_SECRET_NAME and AZURE_KEY_VAULT_NAME:  # Cognitive vision keys are stored in keyvault
-        key_vault_client = SecretClient(
-            vault_url=f"https://{AZURE_KEY_VAULT_NAME}.vault.azure.net", credential=azure_credential
-        )
-        vision_secret = await key_vault_client.get_secret(VISION_SECRET_NAME)
-        vision_key = vision_secret.value
-        await key_vault_client.close()
-
     # Used by the OpenAI SDK
     openai_client: AsyncOpenAI
 
@@ -301,6 +319,8 @@ async def setup_clients():
     current_app.config[CONFIG_AUTH_CLIENT] = auth_helper
 
     current_app.config[CONFIG_GPT4V_DEPLOYED] = bool(USE_GPT4V)
+    current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
+    current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
 
     # Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
     # or some derivative, here we include several for exploration purposes
 
@@ -7,5 +7,7 @@
 CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
 CONFIG_AUTH_CLIENT = "auth_client"
 CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
+CONFIG_SEMANTIC_RANKER_DEPLOYED = "semantic_ranker_deployed"
+CONFIG_VECTOR_SEARCH_ENABLED = "vector_search_enabled"
 CONFIG_SEARCH_CLIENT = "search_client"
 CONFIG_OPENAI_CLIENT = "openai_client"
@@ -80,4 +80,6 @@ export type ChatAppRequest = {
 
 export type Config = {
     showGPT4VOptions: boolean;
+    showSemanticRankerOption: boolean;
+    showVectorOption: boolean;
 };
@@ -59,12 +59,20 @@ const Chat = () => {
     const [answers, setAnswers] = useState<[user: string, response: ChatAppResponse][]>([]);
     const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: ChatAppResponse][]>([]);
     const [showGPT4VOptions, setShowGPT4VOptions] = useState<boolean>(false);
+    const [showSemanticRankerOption, setShowSemanticRankerOption] = useState<boolean>(false);
+    const [showVectorOption, setShowVectorOption] = useState<boolean>(false);
 
     const getConfig = async () => {
         const token = client ? await getToken(client) : undefined;
 
         configApi(token).then(config => {
             setShowGPT4VOptions(config.showGPT4VOptions);
+            setUseSemanticRanker(config.showSemanticRankerOption);
+            setShowSemanticRankerOption(config.showSemanticRankerOption);
+            setShowVectorOption(config.showVectorOption);
+            if (!config.showVectorOption) {
+                setRetrievalMode(RetrievalMode.Text);
+            }
         });
     };
 
@@ -374,12 +382,15 @@ const Chat = () => {
                         onChange={onRetrieveCountChange}
                     />
                     <TextField className={styles.chatSettingsSeparator} label="Exclude category" onChange={onExcludeCategoryChanged} />
-                    <Checkbox
-                        className={styles.chatSettingsSeparator}
-                        checked={useSemanticRanker}
-                        label="Use semantic ranker for retrieval"
-                        onChange={onUseSemanticRankerChange}
-                    />
+
+                    {showSemanticRankerOption && (
+                        <Checkbox
+                            className={styles.chatSettingsSeparator}
+                            checked={useSemanticRanker}
+                            label="Use semantic ranker for retrieval"
+                            onChange={onUseSemanticRankerChange}
+                        />
+                    )}
                     <Checkbox
                         className={styles.chatSettingsSeparator}
                         checked={useSemanticCaptions}
@@ -405,11 +416,13 @@ const Chat = () => {
                         />
                     )}
 
-                    <VectorSettings
-                        showImageOptions={useGPT4V && showGPT4VOptions}
-                        updateVectorFields={(options: VectorFieldOptions[]) => setVectorFieldList(options)}
-                        updateRetrievalMode={(retrievalMode: RetrievalMode) => setRetrievalMode(retrievalMode)}
-                    />
+                    {showVectorOption && (
+                        <VectorSettings
+                            showImageOptions={useGPT4V && showGPT4VOptions}
+                            updateVectorFields={(options: VectorFieldOptions[]) => setVectorFieldList(options)}
+                            updateRetrievalMode={(retrievalMode: RetrievalMode) => setRetrievalMode(retrievalMode)}
+                        />
+                    )}
 
                     {useLogin && (
                         <Checkbox
 
@@ -33,6 +33,8 @@ export function Component(): JSX.Element {
     const [useOidSecurityFilter, setUseOidSecurityFilter] = useState<boolean>(false);
     const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState<boolean>(false);
     const [showGPT4VOptions, setShowGPT4VOptions] = useState<boolean>(false);
+    const [showSemanticRankerOption, setShowSemanticRankerOption] = useState<boolean>(false);
+    const [showVectorOption, setShowVectorOption] = useState<boolean>(false);
 
     const lastQuestionRef = useRef<string>("");
 
@@ -50,6 +52,12 @@ export function Component(): JSX.Element {
 
         configApi(token).then(config => {
             setShowGPT4VOptions(config.showGPT4VOptions);
+            setUseSemanticRanker(config.showSemanticRankerOption);
+            setShowSemanticRankerOption(config.showSemanticRankerOption);
+            setShowVectorOption(config.showVectorOption);
+            if (!config.showVectorOption) {
+                setRetrievalMode(RetrievalMode.Text);
+            }
         });
     };
 
@@ -237,12 +245,16 @@ export function Component(): JSX.Element {
                     onChange={onRetrieveCountChange}
                 />
                 <TextField className={styles.oneshotSettingsSeparator} label="Exclude category" onChange={onExcludeCategoryChanged} />
-                <Checkbox
-                    className={styles.oneshotSettingsSeparator}
-                    checked={useSemanticRanker}
-                    label="Use semantic ranker for retrieval"
-                    onChange={onUseSemanticRankerChange}
-                />
+
+                {showSemanticRankerOption && (
+                    <Checkbox
+                        className={styles.oneshotSettingsSeparator}
+                        checked={useSemanticRanker}
+                        label="Use semantic ranker for retrieval"
+                        onChange={onUseSemanticRankerChange}
+                    />
+                )}
+
                 <Checkbox
                     className={styles.oneshotSettingsSeparator}
                     checked={useSemanticCaptions}
@@ -262,11 +274,13 @@ export function Component(): JSX.Element {
                     />
                 )}
 
-                <VectorSettings
-                    showImageOptions={useGPT4V && showGPT4VOptions}
-                    updateVectorFields={(options: VectorFieldOptions[]) => setVectorFieldList(options)}
-                    updateRetrievalMode={(retrievalMode: RetrievalMode) => setRetrievalMode(retrievalMode)}
-                />
+                {showVectorOption && (
+                    <VectorSettings
+                        showImageOptions={useGPT4V && showGPT4VOptions}
+                        updateVectorFields={(options: VectorFieldOptions[]) => setVectorFieldList(options)}
+                        updateRetrievalMode={(retrievalMode: RetrievalMode) => setRetrievalMode(retrievalMode)}
+                    />
+                )}
 
                 {useLogin && (
                     <Checkbox
 
@@ -0,0 +1,79 @@
+# Deploying with minimal costs
+
+This AI RAG chat application is designed to be easily deployed using the Azure Developer CLI, which provisions the infrastructure according to the Bicep files in the `infra` folder. Those files describe each of the Azure resources needed, and configures their SKU (pricing tier) and other parameters. Many Azure services offer a free tier, but the infrastructure files in this project do *not* default to the free tier as there are often limitations in that tier.
+
+However, if your goal is to minimize costs while prototyping your application, follow these steps below _before_ deploying the application.
+
+1. Use the free tier of App Service:
+
+    ```shell
+    azd env set AZURE_APP_SERVICE_SKU F1
+    ```
+
+    Limitation: You are only allowed a certain number of free App Service instances per region. If you have exceeded your limit in a region, you will get an error during the provisioning stage. If that happens, you can run `azd down`, then `azd env new` to create a new environment with a new region.
+
+2. Use the free tier of Azure AI Search:
+
+    ```shell
+    azd env set AZURE_SEARCH_SERVICE_SKU free
+    ```
+
+    Limitations:
+    1. You are only allowed one free search service across all regions.
+    If you have one already, either delete that service or follow instructions to
+    reuse your [existing search service](../README.md#existing-azure-ai-search-resource).
+    2. The free tier does not support semantic ranker, so the app UI will no longer display
+    the option to use the semantic ranker. Note that will generally result in [decreased search relevance](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/azure-ai-search-outperforming-vector-search-with-hybrid/ba-p/3929167).
+    3. The free tier does not support Managed Identity (keyless API access),
+    so the Bicep will use Azure Key Vault to securely store the key instead.
+
+3. Use the free tier of Azure Document Intelligence (used in analyzing PDFs):
+
+    ```shell
+    azd env set AZURE_FORMRECOGNIZER_SKU F0
+    ```
+
+    Limitation: The free tier will only scan the first two pages of each PDF.
+    In our sample documents, those first two pages are just title pages,
+    so you won't be able to get answers from the documents.
+    You can either use your own documents that are only 2-pages long,
+    or you can use a local Python package for PDF parsing by setting:
+
+    ```shell
+    azd env set USE_LOCAL_PDF_PARSER true
+    ```
+
+3. Turn off Azure Monitor (Application Insights):
+
+    ```shell
+    azd env set AZURE_USE_APPLICATION_INSIGHTS false
+    ```
+
+    Application Insights is quite inexpensive already, so turning this off may not be worth the costs saved,
+    but it is an option for those who want to minimize costs.
+
+4. Disable vector search:
+
+    ```shell
+    azd env set USE_VECTORS false
+    ```
+
+    By default, the application computes vector embeddings for documents during the data ingestion phase,
+    and then computes a vector embedding for user questions asked in the application.
+    Those computations require an embedding model, which incurs costs per tokens used. The costs are fairly low,
+    so the benefits of vector search would typically outweigh the costs, but it is possible to disable vector support.
+    If you do so, the application will fall back to a keyword search, which is less accurate.
+
+5. Once you've made the desired customizations, follow the steps in [to run `azd up`](../README.md#deploying-from-scratch).
+
+## Deploying from an Azure free account
+
+There are additional limitations for Azure free accounts (as opposed to "Pay-as-you-go" accounts which have billing enabled).
+
+As of January 2024, Azure free accounts cannot sign up for Azure OpenAI access.
+You can instead sign up for an openai.com account. Follow these [directions to specify your OpenAI host and key](../README.md#openaicom-openai).
+
+## Reducing costs locally
+
+To save costs for local development, you could use an OpenAI-compatible model.
+Follow steps in [local development guide](localdev.md#using-a-local-openai-compatible-api).
@@ -40,7 +40,8 @@ resource search 'Microsoft.Search/searchServices@2021-04-01-preview' = {
   name: name
   location: location
   tags: tags
-  identity: {
+  // The free tier does not support managed identity
+  identity: (sku.name == 'free') ? null : {
     type: 'SystemAssigned'
   }
   properties: {