Skip to content

Commit 5280f61

Browse files
authored
Merge pull request #82 from ks6088ts-labs/feature/issue-81_batch-transcription
add batch transcription
2 parents 0d86dc9 + fddd1b9 commit 5280f61

File tree

13 files changed

+466
-9
lines changed

13 files changed

+466
-9
lines changed

.env.template

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,12 @@ AZURE_AI_SEARCH_INDEX_NAME="chat"
2222
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT="https://<YOUR_AZURE_DOCUMENT_INTELLIGENCE_NAME>.cognitiveservices.azure.com/"
2323
AZURE_DOCUMENT_INTELLIGENCE_API_VERSION="2024-07-31-preview"
2424
AZURE_DOCUMENT_INTELLIGENCE_API_KEY="<YOUR_API_KEY>"
25+
26+
# Blob Storage
27+
AZURE_BLOB_ACCOUNT_URL="https://<storage-account-name>.blob.core.windows.net"
28+
AZURE_BLOB_SAS_TOKEN="<blob-sas-token>"
29+
AZURE_BLOB_CONTAINER_NAME="audio"
30+
31+
# Azure AI Speech
32+
AZURE_AI_SPEECH_API_ENDPOINT="https://<speech-api-name>.cognitiveservices.azure.com/"
33+
AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY="<speech-api-subscription-key>"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Here are the preferred tools for development.
3939
| [7_streamlit_chat_rag](./apps/7_streamlit_chat_rag/README.md) | Add RAG feature to Streamlit chat app | ![7_streamlit_chat_rag](./docs/images/7_streamlit_chat_rag.main.png) |
4040
| [8_streamlit_azure_openai_batch](./apps/8_streamlit_azure_openai_batch/README.md) | Call Azure OpenAI Batch API with Streamlit | ![8_streamlit_azure_openai_batch](./docs/images/8_streamlit_azure_openai_batch.main.png) |
4141
| [9_streamlit_azure_document_intelligence](./apps/9_streamlit_azure_document_intelligence/README.md) | Call Azure AI Document Intelligence API with Streamlit | ![9_streamlit_azure_document_intelligence](./docs/images/9_streamlit_azure_document_intelligence.main.png) |
42+
| [10_streamlit_batch_transcription](./apps/10_streamlit_batch_transcription/README.md) | Call Batch Transcription API with Streamlit | ![10_streamlit_batch_transcription](./docs/images/10_streamlit_batch_transcription.main.png) |
4243
| [99_streamlit_examples](./apps/99_streamlit_examples/README.md) | Code samples for Streamlit | ![99_streamlit_examples](./docs/images/99_streamlit_examples.explaindata.png) |
4344

4445
## How to run
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Call Batch Transcription API with Streamlit
2+
3+
This is a Streamlit app that calls Azure AI Speech Batch Transcription API.
4+
5+
## Prerequisites
6+
7+
- Python 3.10 or later
8+
- Azure AI Speech Service subscription key
9+
10+
### Infrastructure setup
11+
12+
Follow the steps in [Assign a resource access role](https://learn.microsoft.com/azure/ai-services/speech-service/batch-transcription-audio-data?tabs=portal#assign-resource-access-role) to assign the Storage Blob Data Reader role to the managed identity of your Speech resource.
13+
14+
FIXME: automate this step
15+
16+
## Usage
17+
18+
1. Get Azure AI Speech Service subscription key
19+
1. Copy [.env.template](../../.env.template) to `.env` in the same directory
20+
1. Set credentials in `.env`
21+
1. Run [main.py](./main.py)
22+
23+
```shell
24+
# Create a virtual environment
25+
$ python -m venv .venv
26+
27+
# Activate the virtual environment
28+
$ source .venv/bin/activate
29+
30+
# Install dependencies
31+
$ pip install -r requirements.txt
32+
33+
# Run the script
34+
$ python -m streamlit run apps/10_streamlit_batch_transcription/main.py
35+
```
36+
37+
### Example
38+
39+
![Streamlit](../../docs/images/10_streamlit_batch_transcription.main.png)
40+
41+
## References
42+
43+
- [What is batch transcription?](https://learn.microsoft.com/azure/ai-services/speech-service/batch-transcription)
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
from os import getenv
2+
from urllib.parse import urljoin
3+
4+
import requests
5+
import streamlit as st
6+
from azure.storage.blob import BlobServiceClient
7+
from dotenv import load_dotenv
8+
9+
load_dotenv()
10+
11+
with st.sidebar:
12+
"[Microsoft Learn > What is batch transcription?](https://learn.microsoft.com/azure/ai-services/speech-service/batch-transcription)"
13+
"[Azure Portal](https://portal.azure.com/)"
14+
"[Azure OpenAI Studio](https://oai.azure.com/resource/overview)"
15+
"[View the source code](https://github.com/ks6088ts-labs/workshop-azure-openai/blob/main/apps/10_streamlit_batch_transcription/main.py)"
16+
17+
18+
st.title("10_streamlit_batch_transcription")
19+
20+
# ---------------
21+
# Upload an audio file
22+
# ---------------
23+
st.header("Upload audio file")
24+
st.info("Upload an audio file to transcribe")
25+
uploaded_file = st.file_uploader(
26+
"Upload an audio file",
27+
type=(
28+
"wav",
29+
"mp3",
30+
),
31+
)
32+
if uploaded_file:
33+
bytes_data = uploaded_file.read()
34+
if st.button(
35+
"Submit",
36+
key="submit",
37+
):
38+
with st.spinner("Uploading..."):
39+
try:
40+
response = (
41+
BlobServiceClient(
42+
account_url=getenv("AZURE_BLOB_ACCOUNT_URL"),
43+
credential=getenv("AZURE_BLOB_SAS_TOKEN"),
44+
)
45+
.get_blob_client(
46+
container=getenv("AZURE_BLOB_CONTAINER_NAME"),
47+
blob=uploaded_file.name,
48+
)
49+
.upload_blob(
50+
data=bytes_data,
51+
blob_type="BlockBlob",
52+
overwrite=True,
53+
)
54+
)
55+
st.success("Uploaded successfully")
56+
st.write(response)
57+
58+
except Exception as e:
59+
st.error(e)
60+
st.markdown("---")
61+
62+
# ---------------
63+
# Create a batch transcription: https://learn.microsoft.com/azure/ai-services/speech-service/batch-transcription-create?pivots=rest-api
64+
# ---------------
65+
st.header("Create a batch transcription")
66+
st.info("Create a batch transcription")
67+
blob_name = st.text_input(
68+
label="Blob name",
69+
key="blob_name",
70+
help="Enter the blob name to create a batch transcription",
71+
)
72+
if st.button(
73+
"Create a batch transcription",
74+
key="create",
75+
disabled=not blob_name,
76+
):
77+
with st.spinner("Creating..."):
78+
try:
79+
response = requests.post(
80+
url=urljoin(
81+
getenv("AZURE_AI_SPEECH_API_ENDPOINT"),
82+
"speechtotext/v3.2/transcriptions",
83+
),
84+
headers={
85+
"Ocp-Apim-Subscription-Key": getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"),
86+
"Content-Type": "application/json",
87+
},
88+
json={
89+
"contentUrls": [
90+
urljoin(
91+
getenv("AZURE_BLOB_ACCOUNT_URL"),
92+
f"{getenv('AZURE_BLOB_CONTAINER_NAME')}/{blob_name}",
93+
),
94+
],
95+
"locale": "ja-JP",
96+
"displayName": "My Transcription",
97+
"model": None,
98+
"properties": {
99+
"wordLevelTimestampsEnabled": True,
100+
"languageIdentification": {
101+
"candidateLocales": ["ja-JP", "en-US"],
102+
},
103+
},
104+
},
105+
)
106+
response.raise_for_status()
107+
st.success("Created successfully")
108+
st.write(response)
109+
st.write(response.json())
110+
st.write(f"Transcription ID: {response.json()['self'].split('/')[-1]}")
111+
except Exception as e:
112+
st.error(e)
113+
st.markdown("---")
114+
115+
# ---------------
116+
# Get transcription status
117+
# ---------------
118+
st.header("Get transcription status")
119+
st.info("Get transcription status")
120+
transcription_id = st.text_input(
121+
label="Transcription ID",
122+
key="transcription_id",
123+
help="Enter the batch transcription ID",
124+
)
125+
if st.button(
126+
"Get transcription status",
127+
key="get_transcription_status",
128+
disabled=not transcription_id,
129+
):
130+
with st.spinner("Retrieving..."):
131+
try:
132+
response = requests.get(
133+
url=urljoin(
134+
getenv("AZURE_AI_SPEECH_API_ENDPOINT"),
135+
f"speechtotext/v3.2/transcriptions/{transcription_id}",
136+
),
137+
headers={
138+
"Ocp-Apim-Subscription-Key": getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"),
139+
},
140+
)
141+
response.raise_for_status()
142+
st.write(response.json())
143+
except Exception as e:
144+
st.error(e)
145+
st.markdown("---")
146+
147+
# ---------------
148+
# Get transcription results
149+
# ---------------
150+
st.header("Get transcription results")
151+
st.info("Get transcription results")
152+
transcription_id_result = st.text_input(
153+
label="transcription_id_result",
154+
key="transcription_id_result",
155+
help="Enter the batch transcription ID",
156+
)
157+
if st.button(
158+
"Get transcription results",
159+
key="get_transcription_results",
160+
disabled=not transcription_id_result,
161+
):
162+
with st.spinner("Retrieving..."):
163+
try:
164+
response = requests.get(
165+
url=urljoin(
166+
getenv("AZURE_AI_SPEECH_API_ENDPOINT"),
167+
f"speechtotext/v3.2/transcriptions/{transcription_id_result}/files",
168+
),
169+
headers={
170+
"Ocp-Apim-Subscription-Key": getenv("AZURE_AI_SPEECH_API_SUBSCRIPTION_KEY"),
171+
},
172+
)
173+
response.raise_for_status()
174+
st.write(response.json())
175+
except Exception as e:
176+
st.error(e)

apps/8_streamlit_azure_openai_batch/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def get_client():
115115
batch_file_id = st.text_input(
116116
label="File ID",
117117
key="batch_file_id",
118-
help="Enter the file ID to track the file upload status",
118+
help="Enter the file ID to create a batch job",
119119
)
120120
if st.button(
121121
"Create batch job",
123 KB
Loading

0 commit comments

Comments
 (0)