add ai speech services

ks6088ts · ks6088ts · commit d591f905ef84 · 2024-10-07T21:51:45.000+09:00
diff --git a/.env.template b/.env.template
@@ -31,6 +31,7 @@ AZURE_BLOB_CONTAINER_NAME="audio"
 # Azure AI Speech
 AZURE_AI_SPEECH_API_ENDPOINT="https://<speech-api-name>.cognitiveservices.azure.com/"
 AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY="<speech-api-subscription-key>"
+AZURE_AI_SPEECH_REGION="southeastasia"
 
 # Bing search resource
 BING_SUBSCRIPTION_KEY="<bing-subscription-key>"
diff --git a/apps/99_streamlit_examples/pages/13_Azure_AI_Speech.py b/apps/99_streamlit_examples/pages/13_Azure_AI_Speech.py
@@ -0,0 +1,79 @@
+from os import getenv
+
+import azure.cognitiveservices.speech as speechsdk
+import streamlit as st
+from dotenv import load_dotenv
+
+load_dotenv()
+
+with st.sidebar:
+    service_type = st.selectbox(
+        label="Service type",
+        options=[
+            "Local",
+            "Azure",
+        ],
+    )
+    if service_type == "Azure":
+        azure_ai_services_api_key = st.text_input(
+            label="AZURE_AI_SERVICES_API_KEY",
+            key="AZURE_AI_SERVICES_API_KEY",
+            type="password",
+        )
+        azure_ai_speech_region = st.text_input(
+            label="AZURE_AI_SPEECH_REGION",
+            value=getenv("AZURE_AI_SPEECH_REGION"),
+            key="AZURE_AI_SPEECH_REGION",
+            type="default",
+        )
+    if service_type == "Local":
+        host = st.text_input(
+            label="Host",
+            value="ws://localhost:5000",
+            key="host",
+            type="default",
+        )
+    "[Azure Portal](https://portal.azure.com/)"
+    "[View the source code](https://github.com/ks6088ts-labs/workshop-azure-openai/blob/main/apps/99_streamlit_examples/pages/13_Azure_AI_Speech.py)"
+    "[Speech to text containers with Docker](https://learn.microsoft.com/azure/ai-services/speech-service/speech-container-stt?tabs=container&pivots=programming-language-python)"
+
+
+def is_configured():
+    if service_type == "Azure":
+        return azure_ai_services_api_key and azure_ai_speech_region
+    if service_type == "Local":
+        return host != ""
+    return False
+
+
+def get_speech_config():
+    if service_type == "Azure":
+        return speechsdk.SpeechConfig(
+            subscription=azure_ai_services_api_key,
+            region=azure_ai_speech_region,
+        )
+    if service_type == "Local":
+        return speechsdk.SpeechConfig(
+            endpoint=host,
+        )
+
+
+def from_mic():
+    speech_recognizer = speechsdk.SpeechRecognizer(
+        speech_config=get_speech_config(),
+    )
+
+    print("Speak into your microphone.")
+    speech_recognition_result = speech_recognizer.recognize_once_async().get()
+    print(speech_recognition_result.text)
+
+
+st.title("Azure AI Speech Services")
+
+if not is_configured():
+    st.warning("Please fill in the required fields at the sidebar.")
+
+st.info("Transcribe your speech.")
+
+if st.button("Transcribe", disabled=not is_configured()):
+    from_mic()
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ playwright = "^1.47.0"
 lxml = "^5.3.0"
 nest-asyncio = "^1.6.0"
 typer = "^0.12.5"
+azure-cognitiveservices-speech = "^1.40.0"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^3.8.0"