wip

ks6088ts · ks6088ts · commit 1523e68778f1 · 2024-10-08T09:04:41.000+09:00
diff --git a/apps/14_streamlit_azure_ai_speech/README.md b/apps/14_streamlit_azure_ai_speech/README.md
@@ -1,7 +1,11 @@
 # Streamlit Azure AI Speech
 
 ```shell
+# Speech to Text script
 poetry run python apps/14_streamlit_azure_ai_speech/speech_to_text.py --help
+
+# Streamlit app
+poetry run python -m streamlit run apps/14_streamlit_azure_ai_speech/main.py
 ```
 
 # References
diff --git a/apps/14_streamlit_azure_ai_speech/main.py b/apps/14_streamlit_azure_ai_speech/main.py
@@ -49,11 +49,13 @@
         key="AZURE_AI_SPEECH_API_REGION",
         type="default",
     )
-    azure_ai_speech_api_language = st.text_input(
+    azure_ai_speech_api_language = st.selectbox(
         label="AZURE_AI_SPEECH_API_LANGUAGE",
-        value="en-US",
+        options=[
+            "en-US",
+            "ja-JP",
+        ],
         key="AZURE_AI_SPEECH_API_LANGUAGE",
-        type="default",
     )
     "[Azure Portal](https://portal.azure.com/)"
     "[Azure OpenAI Studio](https://oai.azure.com/resource/overview)"
@@ -71,11 +73,6 @@ def is_configured():
 
 st.info("This is a sample to transcribe text.")
 
-supported_tasks = [
-    "Create summaries",
-    "Do something",
-]
-
 # ---
 # 2 column layout
 
@@ -102,10 +99,14 @@ def is_configured():
 row2_left, row2_right = st.columns(2)
 
 with row2_left:
-    target = st.selectbox(
+    selected_task = st.selectbox(
         "Task",
-        supported_tasks,
-        key="target",
+        [
+            "Create summaries from the following text",
+            "Translate the following text into English",
+            # Add more tasks here
+        ],
+        key="selected_task",
         index=0,
     )
 
@@ -121,15 +122,7 @@ def start_recognition():
     process = subprocess.Popen(command, shell=True)
 
 
-def stop_recognition():
-    global process
-    if process:
-        pathlib.Path(".stop").touch()
-        process.wait()
-        process = None
-
-
-def run_task(target: str, input: str) -> str:
+def run_task(selected_task: str, input: str) -> str:
     client = AzureOpenAI(
         api_key=azure_openai_api_key,
         api_version=azure_openai_api_version,
@@ -142,7 +135,7 @@ def run_task(target: str, input: str) -> str:
             {
                 "role": "system",
                 "content": f"""
-                    You are a professional translator. Please transcribe the following text into {target}.
+                    Task: {selected_task}.
                     ---
                     {input}
                     ---
@@ -160,29 +153,22 @@ def load_transcribed_text():
 
 if start_transcribe_button:
     if not st.session_state.get("process"):
+        transcription_status.info("Transcribing...")
         start_recognition()
-        transcription_status.info("音声認識を開始しました。")
-        st.success("音声認識を開始しました。")
     else:
-        transcription_status.warning("音声認識は既に実行中です。")
-        st.warning("音声認識は既に実行中です。")
+        transcription_status.warning("Transcription is already running.")
 
 if stop_transcribe_button:
+    pathlib.Path(".stop").touch()
     output = load_transcribed_text()
     st.session_state.transcribed_result = output
     st.rerun()
 
-    if st.session_state.get("process"):
-        stop_recognition()
-        st.success("音声認識を停止しました。")
-    else:
-        st.warning("音声認識は実行されていません。")
-
 if run_task_button:
     transcribed_text = load_transcribed_text()
     with st.spinner("Running..."):
         output = run_task(
-            target=target,
+            selected_task=selected_task,
             input=transcribed_text,
         )
         st.write(output)
diff --git a/apps/14_streamlit_azure_ai_speech/speech_to_text.py b/apps/14_streamlit_azure_ai_speech/speech_to_text.py
@@ -63,8 +63,9 @@ def conversation_transcriber_transcribed_cb(evt: speechsdk.SpeechRecognitionEven
     if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
         logger.info(f"\tText={evt.result.text}")
         logger.info(f"\tSpeaker ID={evt.result.speaker_id}")
-        with open(outfilename, "a") as f:
-            f.write(f"{evt.result.text}\n")
+        if evt.result.text != "":
+            with open(outfilename, "a") as f:
+                f.write(f"{evt.result.text}\n")
     elif evt.result.reason == speechsdk.ResultReason.NoMatch:
         logger.info(f"\tNOMATCH: Speech could not be TRANSCRIBED: {evt.result.no_match_details}")
 
@@ -110,7 +111,7 @@ def stop_cb(evt: speechsdk.SessionEventArgs):
     # Waits for completion.
     while not transcribing_stop:
         if os.path.exists(".stop"):
-            logger.info("終了フラグが検出されました。音声認識を終了します。")
+            logger.info("Stopping transcription...")
             conversation_transcriber.stop_transcribing_async()
             os.remove(".stop")
             break