wip

ks6088ts · ks6088ts · commit af33b346de74 · 2024-10-08T07:06:57.000+09:00
diff --git a/.env.template b/.env.template
@@ -31,6 +31,7 @@ AZURE_BLOB_CONTAINER_NAME="audio"
 # Azure AI Speech
 AZURE_AI_SPEECH_API_ENDPOINT="https://<speech-api-name>.cognitiveservices.azure.com/"
 AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY="<speech-api-subscription-key>"
+AZURE_AI_SPEECH_API_REGION="eastus"
 
 # Bing search resource
 BING_SUBSCRIPTION_KEY="<bing-subscription-key>"
diff --git a/apps/14_streamlit_azure_ai_speech/README.md b/apps/14_streamlit_azure_ai_speech/README.md
@@ -0,0 +1,3 @@
+# References
+
+- [AzureSpeechService でリアルタイム議事録](https://zenn.dev/o_ken_surprise/articles/991f5b592b91ee)
diff --git a/apps/14_streamlit_azure_ai_speech/speech_to_text.py b/apps/14_streamlit_azure_ai_speech/speech_to_text.py
@@ -0,0 +1,123 @@
+import argparse
+import logging
+import os
+import time
+
+import azure.cognitiveservices.speech as speechsdk
+from dotenv import load_dotenv
+
+logger = logging.getLogger(__name__)
+
+
+def init_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        prog="speech_to_text",
+        description="Azure AI Speech API Speech-to-Text",
+    )
+    parser.add_argument(
+        "-s",
+        "--subscription",
+        default=os.getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"),
+        help="Azure AI Speech API subscription key",
+    )
+    parser.add_argument(
+        "-r",
+        "--region",
+        default=os.getenv("AZURE_AI_SPEECH_API_REGION"),
+        help="Azure AI Speech API region",
+    )
+    parser.add_argument(
+        "-l",
+        "--language",
+        default="en-US",
+        help="Language code for speech recognition",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default="output.txt",
+        help="Output file path",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Set verbose mode",
+    )
+    return parser.parse_args()
+
+
+def conversation_transcriber_recognition_canceled_cb(evt: speechsdk.SessionEventArgs):
+    print("Canceled event")
+
+
+def conversation_transcriber_session_stopped_cb(evt: speechsdk.SessionEventArgs):
+    print("SessionStopped event")
+
+
+def conversation_transcriber_transcribed_cb(evt: speechsdk.SpeechRecognitionEventArgs):
+    print("TRANSCRIBED:")
+    if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
+        print(f"\tText={evt.result.text}")
+        print(f"\tSpeaker ID={evt.result.speaker_id}")
+    elif evt.result.reason == speechsdk.ResultReason.NoMatch:
+        print(f"\tNOMATCH: Speech could not be TRANSCRIBED: {evt.result.no_match_details}")
+
+
+def conversation_transcriber_session_started_cb(evt: speechsdk.SessionEventArgs):
+    print("SessionStarted event")
+
+
+def recognize_from_file(args: argparse.Namespace):
+    # This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
+    speech_config = speechsdk.SpeechConfig(
+        subscription=args.subscription,
+        region=args.region,
+        speech_recognition_language=args.language,
+    )
+
+    conversation_transcriber = speechsdk.transcription.ConversationTranscriber(
+        speech_config=speech_config,
+    )
+
+    transcribing_stop = False
+
+    def stop_cb(evt: speechsdk.SessionEventArgs):
+        # """callback that signals to stop continuous recognition upon receiving an event `evt`"""
+        print(f"CLOSING on {evt}")
+        nonlocal transcribing_stop
+        transcribing_stop = True
+
+    # Connect callbacks to the events fired by the conversation transcriber
+    conversation_transcriber.transcribed.connect(conversation_transcriber_transcribed_cb)
+    conversation_transcriber.session_started.connect(conversation_transcriber_session_started_cb)
+    conversation_transcriber.session_stopped.connect(conversation_transcriber_session_stopped_cb)
+    conversation_transcriber.canceled.connect(conversation_transcriber_recognition_canceled_cb)
+    # stop transcribing on either session stopped or canceled events
+    conversation_transcriber.session_stopped.connect(stop_cb)
+    conversation_transcriber.canceled.connect(stop_cb)
+
+    conversation_transcriber.start_transcribing_async()
+
+    # Waits for completion.
+    while not transcribing_stop:
+        time.sleep(0.5)
+
+    conversation_transcriber.stop_transcribing_async()
+
+
+if __name__ == "__main__":
+    args = init_args()
+
+    # Set verbose mode
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+
+    # Parse .env file and set environment variables
+    load_dotenv()
+
+    logger.error(args)
+    try:
+        recognize_from_file(args=args)
+    except Exception as err:
+        print(f"Encountered exception. {err}")
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ playwright = "^1.47.0"
 lxml = "^5.3.0"
 nest-asyncio = "^1.6.0"
 typer = "^0.12.5"
+azure-cognitiveservices-speech = "^1.40.0"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^3.8.0"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# References`
	`2`	`+`
	`3`	`+- [AzureSpeechService でリアルタイム議事録](https://zenn.dev/o_ken_surprise/articles/991f5b592b91ee)`