|
| 1 | +import pathlib |
| 2 | +import subprocess |
| 3 | +from os import getenv |
| 4 | + |
| 5 | +import streamlit as st |
| 6 | +from dotenv import load_dotenv |
| 7 | +from openai import AzureOpenAI |
| 8 | + |
| 9 | +load_dotenv() |
| 10 | + |
| 11 | +# Initialize the session state |
| 12 | +if "transcribed_result" not in st.session_state: |
| 13 | + st.session_state["transcribed_result"] = "" |
| 14 | + |
| 15 | +with st.sidebar: |
| 16 | + azure_openai_endpoint = st.text_input( |
| 17 | + label="AZURE_OPENAI_ENDPOINT", |
| 18 | + value=getenv("AZURE_OPENAI_ENDPOINT"), |
| 19 | + key="AZURE_OPENAI_ENDPOINT", |
| 20 | + type="default", |
| 21 | + ) |
| 22 | + azure_openai_api_key = st.text_input( |
| 23 | + label="AZURE_OPENAI_API_KEY", |
| 24 | + value=getenv("AZURE_OPENAI_API_KEY"), |
| 25 | + key="AZURE_OPENAI_API_KEY", |
| 26 | + type="password", |
| 27 | + ) |
| 28 | + azure_openai_api_version = st.text_input( |
| 29 | + label="AZURE_OPENAI_API_VERSION", |
| 30 | + value=getenv("AZURE_OPENAI_API_VERSION"), |
| 31 | + key="AZURE_OPENAI_API_VERSION", |
| 32 | + type="default", |
| 33 | + ) |
| 34 | + azure_openai_gpt_model = st.text_input( |
| 35 | + label="AZURE_OPENAI_GPT_MODEL", |
| 36 | + value=getenv("AZURE_OPENAI_GPT_MODEL"), |
| 37 | + key="AZURE_OPENAI_GPT_MODEL", |
| 38 | + type="default", |
| 39 | + ) |
| 40 | + azure_ai_speech_api_subscription_key = st.text_input( |
| 41 | + label="AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY", |
| 42 | + value=getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"), |
| 43 | + key="AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY", |
| 44 | + type="password", |
| 45 | + ) |
| 46 | + azure_ai_speech_api_region = st.text_input( |
| 47 | + label="AZURE_AI_SPEECH_API_REGION", |
| 48 | + value=getenv("AZURE_AI_SPEECH_API_REGION"), |
| 49 | + key="AZURE_AI_SPEECH_API_REGION", |
| 50 | + type="default", |
| 51 | + ) |
| 52 | + azure_ai_speech_api_language = st.text_input( |
| 53 | + label="AZURE_AI_SPEECH_API_LANGUAGE", |
| 54 | + value="en-US", |
| 55 | + key="AZURE_AI_SPEECH_API_LANGUAGE", |
| 56 | + type="default", |
| 57 | + ) |
| 58 | + "[Azure Portal](https://portal.azure.com/)" |
| 59 | + "[Azure OpenAI Studio](https://oai.azure.com/resource/overview)" |
| 60 | + "[View the source code](https://github.com/ks6088ts-labs/workshop-azure-openai/blob/main/apps/14_streamlit_azure_ai_speech/main.py)" |
| 61 | + |
| 62 | + |
| 63 | +def is_configured(): |
| 64 | + return azure_openai_api_key and azure_openai_endpoint and azure_openai_api_version and azure_openai_gpt_model |
| 65 | + |
| 66 | + |
| 67 | +st.title("transcribe text") |
| 68 | + |
| 69 | +if not is_configured(): |
| 70 | + st.warning("Please fill in the required fields at the sidebar.") |
| 71 | + |
| 72 | +st.info("This is a sample to transcribe text.") |
| 73 | + |
| 74 | +supported_tasks = [ |
| 75 | + "Create summaries", |
| 76 | + "Do something", |
| 77 | +] |
| 78 | + |
| 79 | +# --- |
| 80 | +# 2 column layout |
| 81 | + |
| 82 | +# 1st row |
| 83 | +row1_left, row1_right = st.columns(2) |
| 84 | +with row1_left: |
| 85 | + input = st.text_area( |
| 86 | + "Transcribed text", |
| 87 | + height=400, |
| 88 | + placeholder="Please enter the text to transcribe.", |
| 89 | + key="input", |
| 90 | + value=st.session_state["transcribed_result"], |
| 91 | + ) |
| 92 | + |
| 93 | +with row1_right: |
| 94 | + start_transcribe_button = st.button("start", disabled=not is_configured()) |
| 95 | + stop_transcribe_button = st.button("stop", disabled=not is_configured()) |
| 96 | + transcription_status = st.empty() |
| 97 | + |
| 98 | +# line break horizontal line |
| 99 | +st.markdown("---") |
| 100 | + |
| 101 | +# 2nd row |
| 102 | +row2_left, row2_right = st.columns(2) |
| 103 | + |
| 104 | +with row2_left: |
| 105 | + target = st.selectbox( |
| 106 | + "Task", |
| 107 | + supported_tasks, |
| 108 | + key="target", |
| 109 | + index=0, |
| 110 | + ) |
| 111 | + |
| 112 | +with row2_right: |
| 113 | + run_task_button = st.button("run_task", disabled=not is_configured()) |
| 114 | + |
| 115 | +path_to_transcribed_text = ".transcribed.txt" |
| 116 | + |
| 117 | + |
| 118 | +def start_recognition(): |
| 119 | + global process |
| 120 | + command = f"python apps/14_streamlit_azure_ai_speech/speech_to_text.py --output {path_to_transcribed_text} --subscription {azure_ai_speech_api_subscription_key} --region {azure_ai_speech_api_region} --language {azure_ai_speech_api_language} --verbose" # noqa |
| 121 | + process = subprocess.Popen(command, shell=True) |
| 122 | + |
| 123 | + |
| 124 | +def stop_recognition(): |
| 125 | + global process |
| 126 | + if process: |
| 127 | + pathlib.Path(".stop").touch() |
| 128 | + process.wait() |
| 129 | + process = None |
| 130 | + |
| 131 | + |
| 132 | +def run_task(target: str, input: str) -> str: |
| 133 | + client = AzureOpenAI( |
| 134 | + api_key=azure_openai_api_key, |
| 135 | + api_version=azure_openai_api_version, |
| 136 | + azure_endpoint=azure_openai_endpoint, |
| 137 | + ) |
| 138 | + |
| 139 | + response = client.chat.completions.create( |
| 140 | + model=azure_openai_gpt_model, |
| 141 | + messages=[ |
| 142 | + { |
| 143 | + "role": "system", |
| 144 | + "content": f""" |
| 145 | + You are a professional translator. Please transcribe the following text into {target}. |
| 146 | + --- |
| 147 | + {input} |
| 148 | + --- |
| 149 | + """, |
| 150 | + }, |
| 151 | + ], |
| 152 | + ) |
| 153 | + return response.choices[0].message.content |
| 154 | + |
| 155 | + |
| 156 | +def load_transcribed_text(): |
| 157 | + with open(path_to_transcribed_text) as f: |
| 158 | + return f.read() |
| 159 | + |
| 160 | + |
| 161 | +if start_transcribe_button: |
| 162 | + if not st.session_state.get("process"): |
| 163 | + start_recognition() |
| 164 | + transcription_status.info("音声認識を開始しました。") |
| 165 | + st.success("音声認識を開始しました。") |
| 166 | + else: |
| 167 | + transcription_status.warning("音声認識は既に実行中です。") |
| 168 | + st.warning("音声認識は既に実行中です。") |
| 169 | + |
| 170 | +if stop_transcribe_button: |
| 171 | + output = load_transcribed_text() |
| 172 | + st.session_state.transcribed_result = output |
| 173 | + st.rerun() |
| 174 | + |
| 175 | + if st.session_state.get("process"): |
| 176 | + stop_recognition() |
| 177 | + st.success("音声認識を停止しました。") |
| 178 | + else: |
| 179 | + st.warning("音声認識は実行されていません。") |
| 180 | + |
| 181 | +if run_task_button: |
| 182 | + transcribed_text = load_transcribed_text() |
| 183 | + with st.spinner("Running..."): |
| 184 | + output = run_task( |
| 185 | + target=target, |
| 186 | + input=transcribed_text, |
| 187 | + ) |
| 188 | + st.write(output) |
0 commit comments