Skip to content

Commit 1523e68

Browse files
committed
wip
1 parent ed9863a commit 1523e68

File tree

3 files changed

+26
-35
lines changed

3 files changed

+26
-35
lines changed

apps/14_streamlit_azure_ai_speech/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
# Streamlit Azure AI Speech
22

33
```shell
4+
# Speech to Text script
45
poetry run python apps/14_streamlit_azure_ai_speech/speech_to_text.py --help
6+
7+
# Streamlit app
8+
poetry run python -m streamlit run apps/14_streamlit_azure_ai_speech/main.py
59
```
610

711
# References

apps/14_streamlit_azure_ai_speech/main.py

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,13 @@
4949
key="AZURE_AI_SPEECH_API_REGION",
5050
type="default",
5151
)
52-
azure_ai_speech_api_language = st.text_input(
52+
azure_ai_speech_api_language = st.selectbox(
5353
label="AZURE_AI_SPEECH_API_LANGUAGE",
54-
value="en-US",
54+
options=[
55+
"en-US",
56+
"ja-JP",
57+
],
5558
key="AZURE_AI_SPEECH_API_LANGUAGE",
56-
type="default",
5759
)
5860
"[Azure Portal](https://portal.azure.com/)"
5961
"[Azure OpenAI Studio](https://oai.azure.com/resource/overview)"
@@ -71,11 +73,6 @@ def is_configured():
7173

7274
st.info("This is a sample to transcribe text.")
7375

74-
supported_tasks = [
75-
"Create summaries",
76-
"Do something",
77-
]
78-
7976
# ---
8077
# 2 column layout
8178

@@ -102,10 +99,14 @@ def is_configured():
10299
row2_left, row2_right = st.columns(2)
103100

104101
with row2_left:
105-
target = st.selectbox(
102+
selected_task = st.selectbox(
106103
"Task",
107-
supported_tasks,
108-
key="target",
104+
[
105+
"Create summaries from the following text",
106+
"Translate the following text into English",
107+
# Add more tasks here
108+
],
109+
key="selected_task",
109110
index=0,
110111
)
111112

@@ -121,15 +122,7 @@ def start_recognition():
121122
process = subprocess.Popen(command, shell=True)
122123

123124

124-
def stop_recognition():
125-
global process
126-
if process:
127-
pathlib.Path(".stop").touch()
128-
process.wait()
129-
process = None
130-
131-
132-
def run_task(target: str, input: str) -> str:
125+
def run_task(selected_task: str, input: str) -> str:
133126
client = AzureOpenAI(
134127
api_key=azure_openai_api_key,
135128
api_version=azure_openai_api_version,
@@ -142,7 +135,7 @@ def run_task(target: str, input: str) -> str:
142135
{
143136
"role": "system",
144137
"content": f"""
145-
You are a professional translator. Please transcribe the following text into {target}.
138+
Task: {selected_task}.
146139
---
147140
{input}
148141
---
@@ -160,29 +153,22 @@ def load_transcribed_text():
160153

161154
if start_transcribe_button:
162155
if not st.session_state.get("process"):
156+
transcription_status.info("Transcribing...")
163157
start_recognition()
164-
transcription_status.info("音声認識を開始しました。")
165-
st.success("音声認識を開始しました。")
166158
else:
167-
transcription_status.warning("音声認識は既に実行中です。")
168-
st.warning("音声認識は既に実行中です。")
159+
transcription_status.warning("Transcription is already running.")
169160

170161
if stop_transcribe_button:
162+
pathlib.Path(".stop").touch()
171163
output = load_transcribed_text()
172164
st.session_state.transcribed_result = output
173165
st.rerun()
174166

175-
if st.session_state.get("process"):
176-
stop_recognition()
177-
st.success("音声認識を停止しました。")
178-
else:
179-
st.warning("音声認識は実行されていません。")
180-
181167
if run_task_button:
182168
transcribed_text = load_transcribed_text()
183169
with st.spinner("Running..."):
184170
output = run_task(
185-
target=target,
171+
selected_task=selected_task,
186172
input=transcribed_text,
187173
)
188174
st.write(output)

apps/14_streamlit_azure_ai_speech/speech_to_text.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ def conversation_transcriber_transcribed_cb(evt: speechsdk.SpeechRecognitionEven
6363
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
6464
logger.info(f"\tText={evt.result.text}")
6565
logger.info(f"\tSpeaker ID={evt.result.speaker_id}")
66-
with open(outfilename, "a") as f:
67-
f.write(f"{evt.result.text}\n")
66+
if evt.result.text != "":
67+
with open(outfilename, "a") as f:
68+
f.write(f"{evt.result.text}\n")
6869
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
6970
logger.info(f"\tNOMATCH: Speech could not be TRANSCRIBED: {evt.result.no_match_details}")
7071

@@ -110,7 +111,7 @@ def stop_cb(evt: speechsdk.SessionEventArgs):
110111
# Waits for completion.
111112
while not transcribing_stop:
112113
if os.path.exists(".stop"):
113-
logger.info("終了フラグが検出されました。音声認識を終了します。")
114+
logger.info("Stopping transcription...")
114115
conversation_transcriber.stop_transcribing_async()
115116
os.remove(".stop")
116117
break

0 commit comments

Comments
 (0)