Azure-Samples
diff --git a/‎.azdo/pipelines/azure-dev.yml‎
Lines changed: 6 additions & 0 deletions b/‎.azdo/pipelines/azure-dev.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/azure-dev.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/azure-dev.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/python-test.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/python-test.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 6 deletions b/‎README.md‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎app/backend/app.py‎
Lines changed: 81 additions & 0 deletions b/‎app/backend/app.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎app/backend/config.py‎
Lines changed: 6 additions & 0 deletions b/‎app/backend/config.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎app/backend/requirements.in‎
Lines changed: 1 addition & 0 deletions b/‎app/backend/requirements.in‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎app/backend/requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎app/backend/requirements.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎app/frontend/package-lock.json‎
Lines changed: 13 additions & 0 deletions b/‎app/frontend/package-lock.json‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎app/frontend/package.json‎
Lines changed: 1 addition & 0 deletions b/‎app/frontend/package.json‎
Lines changed: 1 addition & 0 deletions
@@ -86,6 +86,12 @@ steps:
       AZURE_COMPUTER_VISION_RESOURCE_GROUP: $(AZURE_COMPUTER_VISION_RESOURCE_GROUP)
       AZURE_COMPUTER_VISION_LOCATION: $(AZURE_COMPUTER_VISION_LOCATION)
       AZURE_COMPUTER_VISION_SKU: $(AZURE_COMPUTER_VISION_SKU)
+      USE_SPEECH_INPUT_BROWSER: $(USE_SPEECH_INPUT_BROWSER)
+      USE_SPEECH_OUTPUT_AZURE: $(USE_SPEECH_OUTPUT_AZURE)
+      AZURE_SPEECH_SERVICE: $(AZURE_SPEECH_SERVICE)
+      AZURE_SPEECH_SERVICE_RESOURCE_GROUP: $(AZURE_SPEECH_SERVICE_RESOURCE_GROUP)
+      AZURE_SPEECH_SERVICE_LOCATION: $(AZURE_SPEECH_SERVICE_LOCATION)
+      AZURE_SPEECH_SERVICE_SKU: $(AZURE_SPEECH_SERVICE_SKU)
       AZURE_KEY_VAULT_NAME: $(AZURE_KEY_VAULT_NAME)
       AZURE_USE_AUTHENTICATION: $(AZURE_USE_AUTHENTICATION)
       AZURE_ENFORCE_ACCESS_CONTROL: $(AZURE_ENFORCE_ACCESS_CONTROL)
 
@@ -73,6 +73,12 @@ jobs:
       USE_GPT4V: ${{ vars.USE_GPT4V }}
       AZURE_VISION_ENDPOINT: ${{ vars.AZURE_VISION_ENDPOINT }}
       VISION_SECRET_NAME: ${{ vars.VISION_SECRET_NAME }}
+      USE_SPEECH_INPUT_BROWSER: ${{ vars.USE_SPEECH_INPUT_BROWSER }}
+      USE_SPEECH_OUTPUT_AZURE: ${{ vars.USE_SPEECH_OUTPUT_AZURE }}
+      AZURE_SPEECH_SERVICE: ${{ vars.AZURE_SPEECH_SERVICE }}
+      AZURE_SPEECH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SPEECH_RESOURCE_GROUP }}
+      AZURE_SPEECH_SERVICE_LOCATION: ${{ vars.AZURE_SPEECH_SERVICE_LOCATION }}
+      AZURE_SPEECH_SERVICE_SKU: ${{ vars.AZURE_SPEECH_SERVICE_SKU }}
       AZURE_KEY_VAULT_NAME: ${{ vars.AZURE_KEY_VAULT_NAME }}
       AZURE_USE_AUTHENTICATION: ${{ vars.AZURE_USE_AUTHENTICATION }}
       AZURE_ENFORCE_ACCESS_CONTROL: ${{ vars.AZURE_ENFORCE_ACCESS_CONTROL }}
 
@@ -63,11 +63,11 @@ jobs:
           id: e2e
           if: runner.os != 'Windows'
           run: |
-            playwright install --with-deps
+            playwright install chromium --with-deps
             python3 -m pytest tests/e2e.py --tracing=retain-on-failure
         - name: Upload test artifacts
           if: ${{ failure() && steps.e2e.conclusion == 'failure' }}
           uses: actions/upload-artifact@v4
           with:
-            name: playwright-traces
+            name: playwright-traces${{ matrix.python_version }}
             path: test-results
@@ -27,7 +27,6 @@
 - [Troubleshooting](#troubleshooting)
 - [Resources](#resources)
 
-
 [![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=599293758&machine=standardLinux32gb&devcontainer_path=.devcontainer%2Fdevcontainer.json&location=WestUs2)
 [![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/azure-search-openai-demo)
 
@@ -39,11 +38,14 @@ The repo includes sample data so it's ready to try end to end. In this sample ap
 
 ## Features
 
-* Chat and Q&A interfaces
-* Explores various options to help users evaluate the trustworthiness of responses with citations, tracking of source content, etc.
-* Shows possible approaches for data preparation, prompt construction, and orchestration of interaction between model (OpenAI) and retriever (AI Search)
-* Settings directly in the UX to tweak the behavior and experiment with options
-* Performance tracing and monitoring with Application Insights
+- Chat (multi-turn) and Q&A (single turn) interfaces
+- Renders citations and thought process for each answer
+- Includes settings directly in the UI to tweak the behavior and experiment with options
+- Integrates Azure AI Search for indexing and retrieval of documents, with support for [many document formats](/docs/data_ingestion.md#supported-document-formats) as well as [integrated vectorization](/docs/data_ingestion.md#overview-of-integrated-vectorization)
+- Optional usage of [GPT-4 with vision](/docs/gpt4vision.md) to reason over image-heavy documents
+- Optional addition of [speech input/output](/docs/deploy_features.md#enabling-speech-inputoutput) for accessibility
+- Optional automation of [user login and data access](/docs/login_and_acl.md) via Microsoft Entra
+- Performance tracing and monitoring with Application Insights
 
 ![Chat screen](docs/images/chatscreen.png)
 
 
@@ -4,9 +4,17 @@
 import logging
 import mimetypes
 import os
+import time
 from pathlib import Path
 from typing import Any, AsyncGenerator, Dict, Union, cast
 
+from azure.cognitiveservices.speech import (
+    ResultReason,
+    SpeechConfig,
+    SpeechSynthesisOutputFormat,
+    SpeechSynthesisResult,
+    SpeechSynthesizer,
+)
 from azure.core.exceptions import ResourceNotFoundError
 from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
 from azure.monitor.opentelemetry import configure_azure_monitor
@@ -48,11 +56,18 @@
     CONFIG_BLOB_CONTAINER_CLIENT,
     CONFIG_CHAT_APPROACH,
     CONFIG_CHAT_VISION_APPROACH,
+    CONFIG_CREDENTIAL,
     CONFIG_GPT4V_DEPLOYED,
     CONFIG_INGESTER,
     CONFIG_OPENAI_CLIENT,
     CONFIG_SEARCH_CLIENT,
     CONFIG_SEMANTIC_RANKER_DEPLOYED,
+    CONFIG_SPEECH_INPUT_ENABLED,
+    CONFIG_SPEECH_OUTPUT_ENABLED,
+    CONFIG_SPEECH_SERVICE_ID,
+    CONFIG_SPEECH_SERVICE_LOCATION,
+    CONFIG_SPEECH_SERVICE_TOKEN,
+    CONFIG_SPEECH_SERVICE_VOICE,
     CONFIG_USER_BLOB_CONTAINER_CLIENT,
     CONFIG_USER_UPLOAD_ENABLED,
     CONFIG_VECTOR_SEARCH_ENABLED,
@@ -229,10 +244,56 @@ def config():
             "showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
             "showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
             "showUserUpload": current_app.config[CONFIG_USER_UPLOAD_ENABLED],
+            "showSpeechInput": current_app.config[CONFIG_SPEECH_INPUT_ENABLED],
+            "showSpeechOutput": current_app.config[CONFIG_SPEECH_OUTPUT_ENABLED],
         }
     )
 
 
+@bp.route("/speech", methods=["POST"])
+async def speech():
+    if not request.is_json:
+        return jsonify({"error": "request must be json"}), 415
+
+    speech_token = current_app.config.get(CONFIG_SPEECH_SERVICE_TOKEN)
+    if speech_token is None or speech_token.expires_on < time.time() + 60:
+        speech_token = await current_app.config[CONFIG_CREDENTIAL].get_token(
+            "https://cognitiveservices.azure.com/.default"
+        )
+        current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = speech_token
+
+    request_json = await request.get_json()
+    text = request_json["text"]
+    try:
+        # Construct a token as described in documentation:
+        # https://learn.microsoft.com/azure/ai-services/speech-service/how-to-configure-azure-ad-auth?pivots=programming-language-python
+        auth_token = (
+            "aad#"
+            + current_app.config[CONFIG_SPEECH_SERVICE_ID]
+            + "#"
+            + current_app.config[CONFIG_SPEECH_SERVICE_TOKEN].token
+        )
+        speech_config = SpeechConfig(auth_token=auth_token, region=current_app.config[CONFIG_SPEECH_SERVICE_LOCATION])
+        speech_config.speech_synthesis_voice_name = current_app.config[CONFIG_SPEECH_SERVICE_VOICE]
+        speech_config.speech_synthesis_output_format = SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
+        synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
+        result: SpeechSynthesisResult = synthesizer.speak_text_async(text).get()
+        if result.reason == ResultReason.SynthesizingAudioCompleted:
+            return result.audio_data, 200, {"Content-Type": "audio/mp3"}
+        elif result.reason == ResultReason.Canceled:
+            cancellation_details = result.cancellation_details
+            current_app.logger.error(
+                "Speech synthesis canceled: %s %s", cancellation_details.reason, cancellation_details.error_details
+            )
+            raise Exception("Speech synthesis canceled. Check logs for details.")
+        else:
+            current_app.logger.error("Unexpected result reason: %s", result.reason)
+            raise Exception("Speech synthesis failed. Check logs for details.")
+    except Exception as e:
+        logging.exception("Exception in /speech")
+        return jsonify({"error": str(e)}), 500
+
+
 @bp.post("/upload")
 @authenticated
 async def upload(auth_claims: dict[str, Any]):
@@ -337,8 +398,14 @@ async def setup_clients():
     AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER", "lexicon")
     AZURE_SEARCH_SEMANTIC_RANKER = os.getenv("AZURE_SEARCH_SEMANTIC_RANKER", "free").lower()
 
+    AZURE_SPEECH_SERVICE_ID = os.getenv("AZURE_SPEECH_SERVICE_ID")
+    AZURE_SPEECH_SERVICE_LOCATION = os.getenv("AZURE_SPEECH_SERVICE_LOCATION")
+    AZURE_SPEECH_VOICE = os.getenv("AZURE_SPEECH_VOICE", "en-US-AndrewMultilingualNeural")
+
     USE_GPT4V = os.getenv("USE_GPT4V", "").lower() == "true"
     USE_USER_UPLOAD = os.getenv("USE_USER_UPLOAD", "").lower() == "true"
+    USE_SPEECH_INPUT_BROWSER = os.getenv("USE_SPEECH_INPUT_BROWSER", "").lower() == "true"
+    USE_SPEECH_OUTPUT_AZURE = os.getenv("USE_SPEECH_OUTPUT_AZURE", "").lower() == "true"
 
     # Use the current user identity to authenticate with Azure OpenAI, AI Search and Blob Storage (no secrets needed,
     # just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
@@ -421,6 +488,18 @@ async def setup_clients():
     # Used by the OpenAI SDK
     openai_client: AsyncOpenAI
 
+    if USE_SPEECH_OUTPUT_AZURE:
+        if not AZURE_SPEECH_SERVICE_ID or AZURE_SPEECH_SERVICE_ID == "":
+            raise ValueError("Azure speech resource not configured correctly, missing AZURE_SPEECH_SERVICE_ID")
+        if not AZURE_SPEECH_SERVICE_LOCATION or AZURE_SPEECH_SERVICE_LOCATION == "":
+            raise ValueError("Azure speech resource not configured correctly, missing AZURE_SPEECH_SERVICE_LOCATION")
+        current_app.config[CONFIG_SPEECH_SERVICE_ID] = AZURE_SPEECH_SERVICE_ID
+        current_app.config[CONFIG_SPEECH_SERVICE_LOCATION] = AZURE_SPEECH_SERVICE_LOCATION
+        current_app.config[CONFIG_SPEECH_SERVICE_VOICE] = AZURE_SPEECH_VOICE
+        # Wait until token is needed to fetch for the first time
+        current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = None
+        current_app.config[CONFIG_CREDENTIAL] = azure_credential
+
     if OPENAI_HOST.startswith("azure"):
         token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
 
@@ -456,6 +535,8 @@ async def setup_clients():
     current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
     current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
     current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
+    current_app.config[CONFIG_SPEECH_INPUT_ENABLED] = USE_SPEECH_INPUT_BROWSER
+    current_app.config[CONFIG_SPEECH_OUTPUT_ENABLED] = USE_SPEECH_OUTPUT_AZURE
 
     # Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
     # or some derivative, here we include several for exploration purposes
 
@@ -14,3 +14,9 @@
 CONFIG_SEARCH_CLIENT = "search_client"
 CONFIG_OPENAI_CLIENT = "openai_client"
 CONFIG_INGESTER = "ingester"
+CONFIG_SPEECH_INPUT_ENABLED = "speech_input_enabled"
+CONFIG_SPEECH_OUTPUT_ENABLED = "speech_output_enabled"
+CONFIG_SPEECH_SERVICE_ID = "speech_service_id"
+CONFIG_SPEECH_SERVICE_LOCATION = "speech_service_location"
+CONFIG_SPEECH_SERVICE_TOKEN = "speech_service_token"
+CONFIG_SPEECH_SERVICE_VOICE = "speech_service_voice"
@@ -5,6 +5,7 @@ openai[datalib]>=1.3.7
 tiktoken
 tenacity
 azure-ai-documentintelligence
+azure-cognitiveservices-speech
 azure-search-documents==11.6.0b1
 azure-storage-blob
 azure-storage-file-datalake
 
@@ -24,6 +24,8 @@ attrs==23.2.0
     # via aiohttp
 azure-ai-documentintelligence==1.0.0b3
     # via -r requirements.in
+azure-cognitiveservices-speech==1.37.0
+    # via -r requirements.in
 azure-common==1.1.28
     # via azure-search-documents
 azure-core==1.30.1
 
@@ -35,6 +35,7 @@
     "prettier": "^3.0.3",
     "typescript": "^5.2.2",
     "@types/react-syntax-highlighter": "^15.5.7",
+    "@types/dom-speech-recognition": "^0.0.4",
     "vite": "^4.5.3"
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@`
`35`	`35`	`"prettier": "^3.0.3",`
`36`	`36`	`"typescript": "^5.2.2",`
`37`	`37`	`"@types/react-syntax-highlighter": "^15.5.7",`
	`38`	`+ "@types/dom-speech-recognition": "^0.0.4",`
`38`	`39`	`"vite": "^4.5.3"`
`39`	`40`	`}`
`40`	`41`	`}`