Skip to content

Commit a87f861

Browse files
authored
Add examples for aws-sdk-transcribe-streaming (#27)
1 parent 278c7ca commit a87f861

File tree

4 files changed

+317
-0
lines changed

4 files changed

+317
-0
lines changed

clients/aws-sdk-transcribe-streaming/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@ Pages can be built into portable HTML files for the time being. You can
1111
follow the instructions in the docs [README.md](https://github.com/awslabs/aws-sdk-python/blob/main/clients/aws-sdk-transcribe-streaming/docs/README.md).
1212

1313
For high-level documentation, you can view the [`dev-guide`](https://github.com/awslabs/aws-sdk-python/tree/main/dev-guide) at the top level of this repo.
14+
15+
### Examples
16+
17+
The `examples` directory contains the following scripts to help you get started.
18+
You can run each one by calling `uv run <file_name>`. This will set up an
19+
environment for you with a supported Python version and required dependencies.
20+
- `simple_mic.py` - Stream audio from your microphone in real-time and receive transcription results as you speak.
21+
- `simple_file.py` - Transcribe a pre-recorded audio file with simulated real-time streaming and rate limiting.
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# /// script
2+
# requires-python = ">=3.12"
3+
# dependencies = [
4+
# "aiofile~=3.9.0",
5+
# "aws-sdk-transcribe-streaming",
6+
# ]
7+
#
8+
# [tool.uv.sources]
9+
# aws-sdk-transcribe-streaming = { path = "../" }
10+
# ///
11+
"""
12+
Audio file transcription example using AWS Transcribe Streaming.
13+
14+
This example demonstrates how to:
15+
- Read audio from a pre-recorded file
16+
- Stream audio to AWS Transcribe Streaming service with rate limiting
17+
- Receive and display transcription results as they arrive
18+
19+
Prerequisites:
20+
- AWS credentials configured (via environment variables)
21+
- An audio file (default: test.wav in PCM format)
22+
- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed
23+
24+
Usage:
25+
- `uv run simple_file.py`
26+
"""
27+
28+
import asyncio
29+
import time
30+
from pathlib import Path
31+
32+
import aiofile
33+
from smithy_aws_core.identity import EnvironmentCredentialsResolver
34+
from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver
35+
36+
from aws_sdk_transcribe_streaming.client import (
37+
StartStreamTranscriptionInput,
38+
TranscribeStreamingClient,
39+
)
40+
from aws_sdk_transcribe_streaming.config import Config
41+
from aws_sdk_transcribe_streaming.models import (
42+
AudioEvent,
43+
AudioStream,
44+
AudioStreamAudioEvent,
45+
TranscriptEvent,
46+
TranscriptResultStream,
47+
)
48+
49+
AWS_REGION = "us-west-2"
50+
ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com"
51+
52+
SAMPLE_RATE = 16000
53+
BYTES_PER_SAMPLE = 2
54+
CHANNEL_NUMS = 1
55+
AUDIO_PATH = Path(__file__).parent / "test.wav"
56+
CHUNK_SIZE = 1024 * 8
57+
58+
59+
async def apply_realtime_delay(
60+
audio_stream: EventPublisher[AudioStream],
61+
reader,
62+
bytes_per_sample: int,
63+
sample_rate: float,
64+
channel_nums: int,
65+
) -> None:
66+
"""Applies a delay when reading an audio file stream to simulate a real-time delay."""
67+
start_time = time.time()
68+
elapsed_audio_time = 0.0
69+
async for chunk in reader:
70+
await audio_stream.send(
71+
AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk))
72+
)
73+
elapsed_audio_time += len(chunk) / (
74+
bytes_per_sample * sample_rate * channel_nums
75+
)
76+
# sleep to simulate real-time streaming
77+
wait_time = start_time + elapsed_audio_time - time.time()
78+
await asyncio.sleep(wait_time)
79+
80+
81+
class TranscriptResultStreamHandler:
82+
def __init__(self, stream: EventReceiver[TranscriptResultStream]):
83+
self.stream = stream
84+
85+
async def handle_events(self):
86+
# Continuously receives events from the stream and delegates
87+
# to appropriate handlers based on event type.
88+
async for event in self.stream:
89+
if isinstance(event.value, TranscriptEvent):
90+
await self.handle_transcript_event(event.value)
91+
92+
async def handle_transcript_event(self, event: TranscriptEvent):
93+
# This handler can be implemented to handle transcriptions as needed.
94+
# Here's an example to get started.
95+
if not event.transcript or not event.transcript.results:
96+
return
97+
98+
results = event.transcript.results
99+
for result in results:
100+
if result.alternatives:
101+
for alt in result.alternatives:
102+
print(alt.transcript)
103+
104+
105+
async def write_chunks(audio_stream: EventPublisher[AudioStream]):
106+
# NOTE: For pre-recorded files longer than 5 minutes, the sent audio
107+
# chunks should be rate limited to match the realtime bitrate of the
108+
# audio stream to avoid signing issues.
109+
async with aiofile.AIOFile(AUDIO_PATH, "rb") as afp:
110+
reader = aiofile.Reader(afp, chunk_size=CHUNK_SIZE)
111+
await apply_realtime_delay(
112+
audio_stream, reader, BYTES_PER_SAMPLE, SAMPLE_RATE, CHANNEL_NUMS
113+
)
114+
115+
# Send an empty audio event to signal end of input
116+
await audio_stream.send(AudioStreamAudioEvent(value=AudioEvent(audio_chunk=b"")))
117+
# Small delay to ensure empty frame is sent before close
118+
await asyncio.sleep(0.4)
119+
await audio_stream.close()
120+
121+
122+
async def main():
123+
# Initialize the Transcribe Streaming client
124+
client = TranscribeStreamingClient(
125+
config=Config(
126+
endpoint_uri=ENDPOINT_URI,
127+
region=AWS_REGION,
128+
aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
129+
)
130+
)
131+
132+
# Start a streaming transcription session
133+
stream = await client.start_stream_transcription(
134+
input=StartStreamTranscriptionInput(
135+
language_code="en-US",
136+
media_sample_rate_hertz=SAMPLE_RATE,
137+
media_encoding="pcm",
138+
)
139+
)
140+
141+
# Get the output stream for receiving transcription results
142+
_, output_stream = await stream.await_output()
143+
144+
# Set up the handler for processing transcription events
145+
handler = TranscriptResultStreamHandler(output_stream)
146+
147+
print("Transcribing audio from file...")
148+
print("===============================")
149+
150+
# Run audio streaming and transcription handling concurrently
151+
await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events())
152+
153+
154+
if __name__ == "__main__":
155+
asyncio.run(main())
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# /// script
2+
# requires-python = ">=3.12"
3+
# dependencies = [
4+
# "aws-sdk-transcribe-streaming",
5+
# "sounddevice~=0.5.3",
6+
# ]
7+
#
8+
# [tool.uv.sources]
9+
# aws-sdk-transcribe-streaming = { path = "../" }
10+
# ///
11+
"""
12+
Real-time audio transcription example using AWS Transcribe Streaming.
13+
14+
This example demonstrates how to:
15+
- Stream audio from your microphone in real-time
16+
- Send audio to AWS Transcribe Streaming service
17+
- Receive and display transcription results as they arrive
18+
19+
Prerequisites:
20+
- AWS credentials configured (via environment variables)
21+
- A working microphone
22+
- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed
23+
24+
Usage:
25+
- `uv run simple_mic.py`
26+
"""
27+
28+
import asyncio
29+
import sys
30+
from typing import Any, AsyncGenerator, Tuple
31+
32+
import sounddevice
33+
from smithy_aws_core.identity import EnvironmentCredentialsResolver
34+
from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver
35+
36+
from aws_sdk_transcribe_streaming.client import (
37+
StartStreamTranscriptionInput,
38+
TranscribeStreamingClient,
39+
)
40+
from aws_sdk_transcribe_streaming.config import Config
41+
from aws_sdk_transcribe_streaming.models import (
42+
AudioEvent,
43+
AudioStream,
44+
AudioStreamAudioEvent,
45+
TranscriptEvent,
46+
TranscriptResultStream,
47+
)
48+
49+
# Configuration
50+
AWS_REGION = "us-west-2"
51+
ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com"
52+
SAMPLE_RATE = 16000
53+
54+
55+
async def mic_stream() -> AsyncGenerator[Tuple[bytes, Any], None]:
56+
# This function wraps the raw input stream from the microphone forwarding
57+
# the blocks to an asyncio.Queue.
58+
loop = asyncio.get_event_loop()
59+
input_queue: asyncio.Queue = asyncio.Queue()
60+
61+
def callback(indata, frame_count, time_info, status):
62+
loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status))
63+
64+
# Be sure to use the correct parameters for the audio stream that matches
65+
# the audio formats described for the source language you'll be using:
66+
# https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
67+
stream = sounddevice.RawInputStream(
68+
channels=1,
69+
samplerate=SAMPLE_RATE,
70+
callback=callback,
71+
blocksize=1024 * 2,
72+
dtype="int16",
73+
)
74+
75+
# Initiate the audio stream and asynchronously yield the audio chunks
76+
# as they become available.
77+
with stream:
78+
while True:
79+
indata, status = await input_queue.get()
80+
yield indata, status
81+
82+
83+
class TranscriptResultStreamHandler:
84+
def __init__(self, stream: EventReceiver[TranscriptResultStream]):
85+
self.stream = stream
86+
87+
async def handle_events(self):
88+
# Continuously receives events from the stream and delegates
89+
# to appropriate handlers based on event type.
90+
async for event in self.stream:
91+
if isinstance(event.value, TranscriptEvent):
92+
await self.handle_transcript_event(event.value)
93+
94+
async def handle_transcript_event(self, event: TranscriptEvent):
95+
# This handler can be implemented to handle transcriptions as needed.
96+
# Here's an example to get started.
97+
if not event.transcript or not event.transcript.results:
98+
return
99+
100+
results = event.transcript.results
101+
for result in results:
102+
if result.alternatives:
103+
for alt in result.alternatives:
104+
print(alt.transcript)
105+
106+
107+
async def write_chunks(audio_stream: EventPublisher[AudioStream]):
108+
# This connects the raw audio chunks generator coming from the microphone
109+
# and passes them along to the transcription stream.
110+
async for chunk, _ in mic_stream():
111+
await audio_stream.send(
112+
AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk))
113+
)
114+
115+
116+
async def main():
117+
# Initialize the Transcribe Streaming client
118+
client = TranscribeStreamingClient(
119+
config=Config(
120+
endpoint_uri=ENDPOINT_URI,
121+
region=AWS_REGION,
122+
aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
123+
)
124+
)
125+
126+
# Start a streaming transcription session
127+
stream = await client.start_stream_transcription(
128+
input=StartStreamTranscriptionInput(
129+
language_code="en-US",
130+
media_sample_rate_hertz=SAMPLE_RATE,
131+
media_encoding="pcm",
132+
)
133+
)
134+
135+
# Get the output stream for receiving transcription results
136+
_, output_stream = await stream.await_output()
137+
138+
# Set up the handler for processing transcription events
139+
handler = TranscriptResultStreamHandler(output_stream)
140+
141+
print("Start talking to see transcription!")
142+
print("(Press Ctrl+C to stop)")
143+
print("===================================")
144+
145+
# Run audio streaming and transcription handling concurrently
146+
await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events())
147+
148+
149+
if __name__ == "__main__":
150+
try:
151+
asyncio.run(main())
152+
except KeyboardInterrupt:
153+
print("\nExiting.")
154+
sys.exit(0)
223 KB
Binary file not shown.

0 commit comments

Comments
 (0)