Add examples for aws-sdk-transcribe-streaming (#27)

jonathan343 · web-flow · commit a87f86109e01 · 2025-11-07T13:51:49.000-05:00
diff --git a/clients/aws-sdk-transcribe-streaming/README.md b/clients/aws-sdk-transcribe-streaming/README.md
@@ -11,3 +11,11 @@ Pages can be built into portable HTML files for the time being. You can
 follow the instructions in the docs [README.md](https://github.com/awslabs/aws-sdk-python/blob/main/clients/aws-sdk-transcribe-streaming/docs/README.md).
 
 For high-level documentation, you can view the [`dev-guide`](https://github.com/awslabs/aws-sdk-python/tree/main/dev-guide) at the top level of this repo.
+
+### Examples
+
+The `examples` directory contains the following scripts to help you get started.
+You can run each one by calling `uv run <file_name>`. This will set up an
+environment for you with a supported Python version and required dependencies.
+- `simple_mic.py` - Stream audio from your microphone in real-time and receive transcription results as you speak.
+- `simple_file.py` - Transcribe a pre-recorded audio file with simulated real-time streaming and rate limiting.
diff --git a/clients/aws-sdk-transcribe-streaming/examples/simple_file.py b/clients/aws-sdk-transcribe-streaming/examples/simple_file.py
@@ -0,0 +1,155 @@
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#     "aiofile~=3.9.0",
+#     "aws-sdk-transcribe-streaming",
+# ]
+#
+# [tool.uv.sources]
+# aws-sdk-transcribe-streaming = { path = "../" }
+# ///
+"""
+Audio file transcription example using AWS Transcribe Streaming.
+
+This example demonstrates how to:
+- Read audio from a pre-recorded file
+- Stream audio to AWS Transcribe Streaming service with rate limiting
+- Receive and display transcription results as they arrive
+
+Prerequisites:
+- AWS credentials configured (via environment variables)
+- An audio file (default: test.wav in PCM format)
+- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed
+
+Usage:
+- `uv run simple_file.py`
+"""
+
+import asyncio
+import time
+from pathlib import Path
+
+import aiofile
+from smithy_aws_core.identity import EnvironmentCredentialsResolver
+from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver
+
+from aws_sdk_transcribe_streaming.client import (
+    StartStreamTranscriptionInput,
+    TranscribeStreamingClient,
+)
+from aws_sdk_transcribe_streaming.config import Config
+from aws_sdk_transcribe_streaming.models import (
+    AudioEvent,
+    AudioStream,
+    AudioStreamAudioEvent,
+    TranscriptEvent,
+    TranscriptResultStream,
+)
+
+AWS_REGION = "us-west-2"
+ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com"
+
+SAMPLE_RATE = 16000
+BYTES_PER_SAMPLE = 2
+CHANNEL_NUMS = 1
+AUDIO_PATH = Path(__file__).parent / "test.wav"
+CHUNK_SIZE = 1024 * 8
+
+
+async def apply_realtime_delay(
+    audio_stream: EventPublisher[AudioStream],
+    reader,
+    bytes_per_sample: int,
+    sample_rate: float,
+    channel_nums: int,
+) -> None:
+    """Applies a delay when reading an audio file stream to simulate a real-time delay."""
+    start_time = time.time()
+    elapsed_audio_time = 0.0
+    async for chunk in reader:
+        await audio_stream.send(
+            AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk))
+        )
+        elapsed_audio_time += len(chunk) / (
+            bytes_per_sample * sample_rate * channel_nums
+        )
+        # sleep to simulate real-time streaming
+        wait_time = start_time + elapsed_audio_time - time.time()
+        await asyncio.sleep(wait_time)
+
+
+class TranscriptResultStreamHandler:
+    def __init__(self, stream: EventReceiver[TranscriptResultStream]):
+        self.stream = stream
+
+    async def handle_events(self):
+        # Continuously receives events from the stream and delegates
+        # to appropriate handlers based on event type.
+        async for event in self.stream:
+            if isinstance(event.value, TranscriptEvent):
+                await self.handle_transcript_event(event.value)
+
+    async def handle_transcript_event(self, event: TranscriptEvent):
+        # This handler can be implemented to handle transcriptions as needed.
+        # Here's an example to get started.
+        if not event.transcript or not event.transcript.results:
+            return
+
+        results = event.transcript.results
+        for result in results:
+            if result.alternatives:
+                for alt in result.alternatives:
+                    print(alt.transcript)
+
+
+async def write_chunks(audio_stream: EventPublisher[AudioStream]):
+    # NOTE: For pre-recorded files longer than 5 minutes, the sent audio
+    # chunks should be rate limited to match the realtime bitrate of the
+    # audio stream to avoid signing issues.
+    async with aiofile.AIOFile(AUDIO_PATH, "rb") as afp:
+        reader = aiofile.Reader(afp, chunk_size=CHUNK_SIZE)
+        await apply_realtime_delay(
+            audio_stream, reader, BYTES_PER_SAMPLE, SAMPLE_RATE, CHANNEL_NUMS
+        )
+
+    # Send an empty audio event to signal end of input
+    await audio_stream.send(AudioStreamAudioEvent(value=AudioEvent(audio_chunk=b"")))
+    # Small delay to ensure empty frame is sent before close
+    await asyncio.sleep(0.4)
+    await audio_stream.close()
+
+
+async def main():
+    # Initialize the Transcribe Streaming client
+    client = TranscribeStreamingClient(
+        config=Config(
+            endpoint_uri=ENDPOINT_URI,
+            region=AWS_REGION,
+            aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
+        )
+    )
+
+    # Start a streaming transcription session
+    stream = await client.start_stream_transcription(
+        input=StartStreamTranscriptionInput(
+            language_code="en-US",
+            media_sample_rate_hertz=SAMPLE_RATE,
+            media_encoding="pcm",
+        )
+    )
+
+    # Get the output stream for receiving transcription results
+    _, output_stream = await stream.await_output()
+
+    # Set up the handler for processing transcription events
+    handler = TranscriptResultStreamHandler(output_stream)
+
+    print("Transcribing audio from file...")
+    print("===============================")
+
+    # Run audio streaming and transcription handling concurrently
+    await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/clients/aws-sdk-transcribe-streaming/examples/simple_mic.py b/clients/aws-sdk-transcribe-streaming/examples/simple_mic.py
@@ -0,0 +1,154 @@
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#     "aws-sdk-transcribe-streaming",
+#     "sounddevice~=0.5.3",
+# ]
+#
+# [tool.uv.sources]
+# aws-sdk-transcribe-streaming = { path = "../" }
+# ///
+"""
+Real-time audio transcription example using AWS Transcribe Streaming.
+
+This example demonstrates how to:
+- Stream audio from your microphone in real-time
+- Send audio to AWS Transcribe Streaming service
+- Receive and display transcription results as they arrive
+
+Prerequisites:
+- AWS credentials configured (via environment variables)
+- A working microphone
+- [uv](https://docs.astral.sh/uv/getting-started/installation/) installed
+
+Usage:
+- `uv run simple_mic.py`
+"""
+
+import asyncio
+import sys
+from typing import Any, AsyncGenerator, Tuple
+
+import sounddevice
+from smithy_aws_core.identity import EnvironmentCredentialsResolver
+from smithy_core.aio.interfaces.eventstream import EventPublisher, EventReceiver
+
+from aws_sdk_transcribe_streaming.client import (
+    StartStreamTranscriptionInput,
+    TranscribeStreamingClient,
+)
+from aws_sdk_transcribe_streaming.config import Config
+from aws_sdk_transcribe_streaming.models import (
+    AudioEvent,
+    AudioStream,
+    AudioStreamAudioEvent,
+    TranscriptEvent,
+    TranscriptResultStream,
+)
+
+# Configuration
+AWS_REGION = "us-west-2"
+ENDPOINT_URI = f"https://transcribestreaming.{AWS_REGION}.amazonaws.com"
+SAMPLE_RATE = 16000
+
+
+async def mic_stream() -> AsyncGenerator[Tuple[bytes, Any], None]:
+    # This function wraps the raw input stream from the microphone forwarding
+    # the blocks to an asyncio.Queue.
+    loop = asyncio.get_event_loop()
+    input_queue: asyncio.Queue = asyncio.Queue()
+
+    def callback(indata, frame_count, time_info, status):
+        loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status))
+
+    # Be sure to use the correct parameters for the audio stream that matches
+    # the audio formats described for the source language you'll be using:
+    # https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
+    stream = sounddevice.RawInputStream(
+        channels=1,
+        samplerate=SAMPLE_RATE,
+        callback=callback,
+        blocksize=1024 * 2,
+        dtype="int16",
+    )
+
+    # Initiate the audio stream and asynchronously yield the audio chunks
+    # as they become available.
+    with stream:
+        while True:
+            indata, status = await input_queue.get()
+            yield indata, status
+
+
+class TranscriptResultStreamHandler:
+    def __init__(self, stream: EventReceiver[TranscriptResultStream]):
+        self.stream = stream
+
+    async def handle_events(self):
+        # Continuously receives events from the stream and delegates
+        # to appropriate handlers based on event type.
+        async for event in self.stream:
+            if isinstance(event.value, TranscriptEvent):
+                await self.handle_transcript_event(event.value)
+
+    async def handle_transcript_event(self, event: TranscriptEvent):
+        # This handler can be implemented to handle transcriptions as needed.
+        # Here's an example to get started.
+        if not event.transcript or not event.transcript.results:
+            return
+
+        results = event.transcript.results
+        for result in results:
+            if result.alternatives:
+                for alt in result.alternatives:
+                    print(alt.transcript)
+
+
+async def write_chunks(audio_stream: EventPublisher[AudioStream]):
+    # This connects the raw audio chunks generator coming from the microphone
+    # and passes them along to the transcription stream.
+    async for chunk, _ in mic_stream():
+        await audio_stream.send(
+            AudioStreamAudioEvent(value=AudioEvent(audio_chunk=chunk))
+        )
+
+
+async def main():
+    # Initialize the Transcribe Streaming client
+    client = TranscribeStreamingClient(
+        config=Config(
+            endpoint_uri=ENDPOINT_URI,
+            region=AWS_REGION,
+            aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
+        )
+    )
+
+    # Start a streaming transcription session
+    stream = await client.start_stream_transcription(
+        input=StartStreamTranscriptionInput(
+            language_code="en-US",
+            media_sample_rate_hertz=SAMPLE_RATE,
+            media_encoding="pcm",
+        )
+    )
+
+    # Get the output stream for receiving transcription results
+    _, output_stream = await stream.await_output()
+
+    # Set up the handler for processing transcription events
+    handler = TranscriptResultStreamHandler(output_stream)
+
+    print("Start talking to see transcription!")
+    print("(Press Ctrl+C to stop)")
+    print("===================================")
+
+    # Run audio streaming and transcription handling concurrently
+    await asyncio.gather(write_chunks(stream.input_stream), handler.handle_events())
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\nExiting.")
+        sys.exit(0)
diff --git a/clients/aws-sdk-transcribe-streaming/examples/test.wav b/clients/aws-sdk-transcribe-streaming/examples/test.wav