wip

ks6088ts · ks6088ts · commit f788684ecc9d · 2024-10-07T22:36:48.000+09:00
diff --git a/apps/99_streamlit_examples/pages/13_Azure_AI_Speech.py b/apps/99_streamlit_examples/pages/13_Azure_AI_Speech.py
@@ -1,4 +1,5 @@
 import time
+from enum import Enum
 from os import getenv
 
 import azure.cognitiveservices.speech as speechsdk
@@ -7,18 +8,43 @@
 
 load_dotenv()
 
+
+class ServiceType(Enum):
+    Azure = "Azure"
+    Local = "Local"
+
+
+class TranscriptionStatus(Enum):
+    NotStarted = "Not started"
+    InProgress = "In progress"
+    Completed = "Completed"
+
+
+if "transcription_status" not in st.session_state:
+    st.session_state.transcription_status = TranscriptionStatus.NotStarted
+
+
 with st.sidebar:
+    speech_recognition_language = st.selectbox(
+        label="Speech recognition language",
+        options=[
+            "en-US",
+            "ja-JP",
+            "zh-CN",
+        ],
+    )
     service_type = st.selectbox(
         label="Service type",
         options=[
-            "Local",
-            "Azure",
+            ServiceType.Local.value,
+            ServiceType.Azure.value,
         ],
     )
-    if service_type == "Azure":
-        azure_ai_services_api_key = st.text_input(
-            label="AZURE_AI_SERVICES_API_KEY",
-            key="AZURE_AI_SERVICES_API_KEY",
+    if service_type == ServiceType.Azure.value:
+        azure_ai_speech_api_subscription_key = st.text_input(
+            label="AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY",
+            value=getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"),
+            key="AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY",
             type="password",
         )
         azure_ai_speech_region = st.text_input(
@@ -27,7 +53,7 @@
             key="AZURE_AI_SPEECH_REGION",
             type="default",
         )
-    if service_type == "Local":
+    if service_type == ServiceType.Local.value:
         host = st.text_input(
             label="Host",
             value="ws://localhost:5000",
@@ -40,22 +66,24 @@
 
 
 def is_configured():
-    if service_type == "Azure":
-        return azure_ai_services_api_key and azure_ai_speech_region
-    if service_type == "Local":
-        return host != ""
+    if service_type == ServiceType.Azure.value:
+        return azure_ai_speech_api_subscription_key and azure_ai_speech_region and speech_recognition_language
+    if service_type == ServiceType.Local.value:
+        return host != "" and speech_recognition_language
     return False
 
 
 def get_speech_config():
-    if service_type == "Azure":
+    if service_type == ServiceType.Azure.value:
         return speechsdk.SpeechConfig(
-            subscription=azure_ai_services_api_key,
+            subscription=azure_ai_speech_api_subscription_key,
             region=azure_ai_speech_region,
+            speech_recognition_language=speech_recognition_language,
         )
-    if service_type == "Local":
+    if service_type == ServiceType.Local.value:
         return speechsdk.SpeechConfig(
             endpoint=host,
+            speech_recognition_language=speech_recognition_language,
         )
 
 
@@ -113,10 +141,12 @@ def conversation_transcriber_session_started_cb(evt: speechsdk.SessionEventArgs)
 
 st.title("Azure AI Speech Services")
 
+# Show transcription status
+st.info(f"Transcription status: {st.session_state.transcription_status}")
+
 if not is_configured():
     st.warning("Please fill in the required fields at the sidebar.")
 
-st.info("Transcribe your speech.")
-
 if st.button("Transcribe", disabled=not is_configured()):
+    st.session_state.transcription_status = TranscriptionStatus.InProgress
     from_mic()