w

ks6088ts · ks6088ts · commit ffd09c218a60 · 2024-10-08T09:27:19.000+09:00
diff --git a/apps/14_streamlit_azure_ai_speech/README.md b/apps/14_streamlit_azure_ai_speech/README.md
@@ -12,4 +12,5 @@ poetry run python -m streamlit run apps/14_streamlit_azure_ai_speech/main.py
 
 - [How to recognize speech](https://learn.microsoft.com/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python)
 - [Quickstart: Create real-time diarization](https://learn.microsoft.com/azure/ai-services/speech-service/get-started-stt-diarization?tabs=windows&pivots=programming-language-python)
+- [Speech to text containers with Docker](https://learn.microsoft.com/azure/ai-services/speech-service/speech-container-stt?tabs=container&pivots=programming-language-python)
 - [AzureSpeechService でリアルタイム議事録](https://zenn.dev/o_ken_surprise/articles/991f5b592b91ee)
diff --git a/apps/14_streamlit_azure_ai_speech/main.py b/apps/14_streamlit_azure_ai_speech/main.py
@@ -22,6 +22,18 @@
         key="INEFERENCE_TYPE",
     )
     if inference_type == "local":
+        path_to_model = st.text_input(
+            label="PATH_TO_MODEL",
+            value="./model",
+            key="PATH_TO_MODEL",
+            type="default",
+        )
+        host = st.text_input(
+            label="HOST",
+            value="ws://localhost:5000",
+            key="HOST",
+            type="default",
+        )
         st.warning("yet to be implemented")
     if inference_type == "azure":
         azure_openai_endpoint = st.text_input(
diff --git a/apps/14_streamlit_azure_ai_speech/speech_to_text.py b/apps/14_streamlit_azure_ai_speech/speech_to_text.py
@@ -17,6 +17,18 @@ def init_args() -> argparse.Namespace:
         prog="speech_to_text",
         description="Azure AI Speech API Speech-to-Text",
     )
+    parser.add_argument(
+        "-t",
+        "--type",
+        default="azure",
+        help="Inference type, either 'local' or 'azure'",
+    )
+    parser.add_argument(
+        "-h",
+        "--host",
+        default="ws://localhost:5000",
+        help="Host address for local inference",
+    )
     parser.add_argument(
         "-s",
         "--subscription",
@@ -79,11 +91,20 @@ def start_transcription(args: argparse.Namespace):
     global outfilename
     outfilename = args.output
 
-    speech_config = speechsdk.SpeechConfig(
-        subscription=args.subscription,
-        region=args.region,
-        speech_recognition_language=args.language,
-    )
+    speech_config = None
+    if args.type == "local":
+        speech_config = speechsdk.SpeechConfig(
+            host=args.host,
+            speech_recognition_language=args.language,
+        )
+    if args.type == "azure":
+        speech_config = speechsdk.SpeechConfig(
+            subscription=args.subscription,
+            region=args.region,
+            speech_recognition_language=args.language,
+        )
+    if not speech_config:
+        raise ValueError(f"Invalid inference type: {args.type}")
 
     conversation_transcriber = speechsdk.transcription.ConversationTranscriber(
         speech_config=speech_config,