Skip to content

Commit af33b34

Browse files
committed
wip
1 parent c2adbf6 commit af33b34

File tree

5 files changed

+149
-6
lines changed

5 files changed

+149
-6
lines changed

.env.template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ AZURE_BLOB_CONTAINER_NAME="audio"
3131
# Azure AI Speech
3232
AZURE_AI_SPEECH_API_ENDPOINT="https://<speech-api-name>.cognitiveservices.azure.com/"
3333
AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY="<speech-api-subscription-key>"
34+
AZURE_AI_SPEECH_API_REGION="eastus"
3435

3536
# Bing search resource
3637
BING_SUBSCRIPTION_KEY="<bing-subscription-key>"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# References
2+
3+
- [AzureSpeechService でリアルタイム議事録](https://zenn.dev/o_ken_surprise/articles/991f5b592b91ee)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import argparse
2+
import logging
3+
import os
4+
import time
5+
6+
import azure.cognitiveservices.speech as speechsdk
7+
from dotenv import load_dotenv
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def init_args() -> argparse.Namespace:
13+
parser = argparse.ArgumentParser(
14+
prog="speech_to_text",
15+
description="Azure AI Speech API Speech-to-Text",
16+
)
17+
parser.add_argument(
18+
"-s",
19+
"--subscription",
20+
default=os.getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"),
21+
help="Azure AI Speech API subscription key",
22+
)
23+
parser.add_argument(
24+
"-r",
25+
"--region",
26+
default=os.getenv("AZURE_AI_SPEECH_API_REGION"),
27+
help="Azure AI Speech API region",
28+
)
29+
parser.add_argument(
30+
"-l",
31+
"--language",
32+
default="en-US",
33+
help="Language code for speech recognition",
34+
)
35+
parser.add_argument(
36+
"-o",
37+
"--output",
38+
default="output.txt",
39+
help="Output file path",
40+
)
41+
parser.add_argument(
42+
"-v",
43+
"--verbose",
44+
action="store_true",
45+
help="Set verbose mode",
46+
)
47+
return parser.parse_args()
48+
49+
50+
def conversation_transcriber_recognition_canceled_cb(evt: speechsdk.SessionEventArgs):
51+
print("Canceled event")
52+
53+
54+
def conversation_transcriber_session_stopped_cb(evt: speechsdk.SessionEventArgs):
55+
print("SessionStopped event")
56+
57+
58+
def conversation_transcriber_transcribed_cb(evt: speechsdk.SpeechRecognitionEventArgs):
59+
print("TRANSCRIBED:")
60+
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
61+
print(f"\tText={evt.result.text}")
62+
print(f"\tSpeaker ID={evt.result.speaker_id}")
63+
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
64+
print(f"\tNOMATCH: Speech could not be TRANSCRIBED: {evt.result.no_match_details}")
65+
66+
67+
def conversation_transcriber_session_started_cb(evt: speechsdk.SessionEventArgs):
68+
print("SessionStarted event")
69+
70+
71+
def recognize_from_file(args: argparse.Namespace):
72+
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
73+
speech_config = speechsdk.SpeechConfig(
74+
subscription=args.subscription,
75+
region=args.region,
76+
speech_recognition_language=args.language,
77+
)
78+
79+
conversation_transcriber = speechsdk.transcription.ConversationTranscriber(
80+
speech_config=speech_config,
81+
)
82+
83+
transcribing_stop = False
84+
85+
def stop_cb(evt: speechsdk.SessionEventArgs):
86+
# """callback that signals to stop continuous recognition upon receiving an event `evt`"""
87+
print(f"CLOSING on {evt}")
88+
nonlocal transcribing_stop
89+
transcribing_stop = True
90+
91+
# Connect callbacks to the events fired by the conversation transcriber
92+
conversation_transcriber.transcribed.connect(conversation_transcriber_transcribed_cb)
93+
conversation_transcriber.session_started.connect(conversation_transcriber_session_started_cb)
94+
conversation_transcriber.session_stopped.connect(conversation_transcriber_session_stopped_cb)
95+
conversation_transcriber.canceled.connect(conversation_transcriber_recognition_canceled_cb)
96+
# stop transcribing on either session stopped or canceled events
97+
conversation_transcriber.session_stopped.connect(stop_cb)
98+
conversation_transcriber.canceled.connect(stop_cb)
99+
100+
conversation_transcriber.start_transcribing_async()
101+
102+
# Waits for completion.
103+
while not transcribing_stop:
104+
time.sleep(0.5)
105+
106+
conversation_transcriber.stop_transcribing_async()
107+
108+
109+
if __name__ == "__main__":
110+
args = init_args()
111+
112+
# Set verbose mode
113+
if args.verbose:
114+
logging.basicConfig(level=logging.DEBUG)
115+
116+
# Parse .env file and set environment variables
117+
load_dotenv()
118+
119+
logger.error(args)
120+
try:
121+
recognize_from_file(args=args)
122+
except Exception as err:
123+
print(f"Encountered exception. {err}")

poetry.lock

Lines changed: 21 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ playwright = "^1.47.0"
3636
lxml = "^5.3.0"
3737
nest-asyncio = "^1.6.0"
3838
typer = "^0.12.5"
39+
azure-cognitiveservices-speech = "^1.40.0"
3940

4041
[tool.poetry.group.dev.dependencies]
4142
pre-commit = "^3.8.0"

0 commit comments

Comments
 (0)