|
| 1 | +--- |
| 2 | +description: Use the Nitric framework to build a service for transcribing podcasts |
| 3 | +tags: |
| 4 | + - API |
| 5 | + - AI & Machine Learning |
| 6 | +languages: |
| 7 | + - python |
| 8 | +--- |
| 9 | + |
| 10 | +# Transcribing Podcasts using OpenAI Whisper |
| 11 | + |
| 12 | +## Prerequisites |
| 13 | + |
| 14 | +- [uv](https://docs.astral.sh/uv/#getting-started) - for Python dependency management |
| 15 | +- The [Nitric CLI](/get-started/installation) |
| 16 | +- _(optional)_ An [AWS](https://aws.amazon.com) account |
| 17 | + |
| 18 | +## Getting started |
| 19 | + |
| 20 | +We'll start by creating a new project using Nitric's python starter template. |
| 21 | + |
| 22 | +```bash |
| 23 | +nitric new podcast-transcription py-starter |
| 24 | +cd podcast-transcription |
| 25 | +``` |
| 26 | + |
| 27 | +Next, let's install our base dependencies, then add the `openai-whisper` library. |
| 28 | + |
| 29 | +```bash |
| 30 | +# Install the base dependencies |
| 31 | +uv sync |
| 32 | +# Add hugginface hub dependencies |
| 33 | +uv add openai-whisper |
| 34 | +``` |
| 35 | + |
| 36 | +<Note> |
| 37 | + We add the extra dependencies to the 'ml' optional dependencies to keep them |
| 38 | + separate since they can be quite large. This lets us just install them in the |
| 39 | + containers that need them. |
| 40 | +</Note> |
| 41 | + |
| 42 | +## Define our resources |
| 43 | + |
| 44 | +```python |
| 45 | +from nitric.resources import job, bucket, api |
| 46 | + |
| 47 | +main_api = api("main") |
| 48 | + |
| 49 | +transcribe_job = job("transcribe") |
| 50 | + |
| 51 | +podcast_bucket = bucket("podcasts") |
| 52 | +transcript_bucket = bucket("transcripts") |
| 53 | +``` |
| 54 | + |
| 55 | +## Add our resources service |
| 56 | + |
| 57 | +```python |
| 58 | +import requests |
| 59 | + |
| 60 | +from src.resources import main_api, transcript_bucket, podcast_bucket, transcribe_job |
| 61 | +from nitric.application import Nitric |
| 62 | +from nitric.resources import BucketNotificationContext |
| 63 | +from nitric.context import HttpContext |
| 64 | + |
| 65 | +writeable_podcast_bucket = podcast_bucket.allow("write") |
| 66 | +readable_transcript_bucket = transcript_bucket.allow("read") |
| 67 | +submittable_transcribe_job = transcribe_job.allow("submit") |
| 68 | + |
| 69 | +@main_api.get("/podcast/:name") |
| 70 | +async def get_podcast(ctx: HttpContext): |
| 71 | + name = ctx.req.params['name'] |
| 72 | + |
| 73 | + transcript = await readable_transcript_bucket.file(name).read() |
| 74 | + |
| 75 | + ctx.res.body = transcript |
| 76 | + |
| 77 | + return ctx |
| 78 | + |
| 79 | +@main_api.post("/podcast/:name") |
| 80 | +async def add_podcast(ctx: HttpContext): |
| 81 | + name = ctx.req.params['name'] |
| 82 | + |
| 83 | + upload_url = await writeable_podcast_bucket.file(name).upload_url() |
| 84 | + |
| 85 | + resp = requests.post(upload_url, data=ctx.req.data) |
| 86 | + if resp.status_code >= 200 and resp.status_code < 300: |
| 87 | + ctx.res.status = resp.status_code |
| 88 | + ctx.res.body = resp.text |
| 89 | + |
| 90 | + return ctx |
| 91 | + |
| 92 | + |
| 93 | +@writeable_podcast_bucket.on("write", "*") |
| 94 | +async def on_add_podcast(ctx: BucketNotificationContext): |
| 95 | + await submittable_transcribe_job.submit({ "podcast_name": ctx.req.key }) |
| 96 | + |
| 97 | + return ctx |
| 98 | + |
| 99 | + |
| 100 | +Nitric.run() |
| 101 | +``` |
| 102 | + |
| 103 | +## Add Transcribe Batch Job |
| 104 | + |
| 105 | +```python |
| 106 | +import whisper |
| 107 | +from src.resources import transcribe_job, transcript_bucket, podcast_bucket |
| 108 | +from nitric.context import JobContext |
| 109 | +from nitric.application import Nitric |
| 110 | + |
| 111 | +writeable_transcript_bucket = transcript_bucket.allow("write") |
| 112 | +readable_podcast_bucket = podcast_bucket.allow("read") |
| 113 | + |
| 114 | +@transcribe_job(cpus=1, memory=1024, gpus=0) |
| 115 | +async def transcribe_podcast(ctx: JobContext): |
| 116 | + podcast_name = ctx.req.data["podcast_name"] |
| 117 | + print(f"Transcribing: {podcast_name}") |
| 118 | + |
| 119 | + podcast = await readable_podcast_bucket.file(podcast_name).read() |
| 120 | + |
| 121 | + with open("local-podcast", "wb") as f: |
| 122 | + f.write(podcast) |
| 123 | + |
| 124 | + model = whisper.load_model("turbo") |
| 125 | + result = model.transcribe("local-podcast", verbose=True, fp16=False) |
| 126 | + |
| 127 | + transcript = result["text"].encode() |
| 128 | + |
| 129 | + print("Finished transcoding... Writing to Bucket") |
| 130 | + await writeable_transcript_bucket.file(f"{podcast_name}-transcript.txt").write(transcript) |
| 131 | + |
| 132 | + return ctx |
| 133 | + |
| 134 | +Nitric.run() |
| 135 | +``` |
| 136 | + |
| 137 | +## Deployment Dockerfiles |
| 138 | + |
| 139 | +```docker |
| 140 | +# The python version must match the version in .python-version |
| 141 | +FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim AS builder |
| 142 | +
|
| 143 | +ARG HANDLER |
| 144 | +ENV HANDLER=${HANDLER} |
| 145 | +
|
| 146 | +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy PYTHONPATH=. |
| 147 | +WORKDIR /app |
| 148 | +RUN --mount=type=cache,target=/root/.cache/uv \ |
| 149 | + --mount=type=bind,source=uv.lock,target=uv.lock \ |
| 150 | + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ |
| 151 | + uv sync --frozen -v --no-install-project --extra ml --no-dev --no-python-downloads |
| 152 | +COPY . /app |
| 153 | +RUN --mount=type=cache,target=/root/.cache/uv \ |
| 154 | + uv sync --frozen -v --no-dev --extra ml --no-python-downloads |
| 155 | +
|
| 156 | +# Torch dockerfile |
| 157 | +# Used for torch runtime based nitric batch services |
| 158 | +# Don't need to include the CUDA runtime as the nvidia pypi dep already ships with it |
| 159 | +FROM nvcr.io/nvidia/driver:550-5.15.0-1065-nvidia-ubuntu22.04 |
| 160 | +
|
| 161 | +ARG HANDLER |
| 162 | +
|
| 163 | +ENV HANDLER=${HANDLER} |
| 164 | +ENV PYTHONUNBUFFERED=TRUE |
| 165 | +ENV PYTHONPATH="." |
| 166 | +ENV NVIDIA_DRIVER_CAPABILITIES=all |
| 167 | +ENV NVIDIA_REQUIRE_CUDA="cuda>=8.0" |
| 168 | +
|
| 169 | +RUN apt-get update -y && \ |
| 170 | + apt-get install -y software-properties-common ffmpeg && \ |
| 171 | + add-apt-repository ppa:deadsnakes/ppa && \ |
| 172 | + apt-get update -y && \ |
| 173 | + apt-get install -y python3.11 && \ |
| 174 | + ln -sf /usr/bin/python3.11 /usr/local/bin/python3.11 |
| 175 | +
|
| 176 | +# Copy the application from the builder |
| 177 | +COPY --from=builder --chown=app:app /app /app |
| 178 | +WORKDIR /app |
| 179 | +
|
| 180 | +# Place executables in the environment at the front of the path |
| 181 | +ENV PATH="/app/.venv/bin:$PATH" |
| 182 | +
|
| 183 | +# Run the service using the path to the handler |
| 184 | +ENTRYPOINT python -u $HANDLER |
| 185 | +``` |
| 186 | + |
| 187 | +```text |
| 188 | +.mypy_cache/ |
| 189 | +.nitric/ |
| 190 | +.venv/ |
| 191 | +nitric.yaml |
| 192 | +README.md |
| 193 | +``` |
| 194 | + |
| 195 | +### Requesting a G instance quota increase |
| 196 | + |
| 197 | +Most AWS accounts **will not** have access to on-demand GPU instances (G |
| 198 | +Instances), if you'd like to run models using a GPU you'll need to request a quota increase for G instances. |
| 199 | + |
| 200 | +If you prefer not to use a GPU you can set `gpus=0` in the `@gen_audio_job` decorator in `batches/transcribe.py`. |
| 201 | + |
| 202 | +<Note> |
| 203 | + **Important:** If the gpus value in `batches/transcribe.py` exceeds the number |
| 204 | + of available GPUs in your AWS account, the job will never start. If you want |
| 205 | + to run without a GPU, make sure to set `gpus=0` in the `@transcribe_podcast` |
| 206 | + decorator. This is just a quirk of how AWS Batch works. |
| 207 | +</Note> |
| 208 | + |
| 209 | +If you want to use a GPU you'll need to request a quota increase for G instances in AWS. |
| 210 | + |
| 211 | +To request a quota increase for G instances in AWS you can follow these steps: |
| 212 | + |
| 213 | +1. Go to the [AWS Service Quotas for EC2](https://console.aws.amazon.com/servicequotas/home/services/ec2/quotas) page. |
| 214 | +2. Find/Search for **All G and VT Spot Instance Requests** |
| 215 | +3. Click **Request quota increase** |
| 216 | +4. Choose an appropriate value, e.g. 4, 8 or 16 depending on your needs |
| 217 | + |
| 218 | +<img |
| 219 | + src="/docs/images/guides/ai-podcast/part-1/g-instance-quota-increase.png" |
| 220 | + style={{ maxWidth: 500, width: '100%', border: '1px solid #e5e7eb' }} |
| 221 | + alt="screen shot of requesting a G instance quota increase on AWS" |
| 222 | +/> |
| 223 | + |
| 224 | +Once you've requested the quota increase it may take time for AWS to approve it. |
| 225 | + |
| 226 | +### Deploy the project |
| 227 | + |
| 228 | +Once the above is complete, we can deploy the project to the cloud using: |
| 229 | + |
| 230 | +```bash |
| 231 | +nitric up |
| 232 | +``` |
| 233 | + |
| 234 | +<Note> |
| 235 | + The initial deployment may take time due to the size of the python/Nvidia |
| 236 | + driver and CUDA runtime dependencies. |
| 237 | +</Note> |
| 238 | + |
| 239 | +Once the project is deployed you can try out some transcriptions, just add a podcast to the bucket and the bucket notification will be triggered. |
| 240 | + |
| 241 | +<Note> |
| 242 | +Running the project in the cloud will incur costs. Make sure to monitor your usage and shut down the project if you're done with it. |
| 243 | + |
| 244 | +Running on g5.xlarge from testing this project will cost ~$0.05/minute of audio you generate. Based on standard EC2 pricing for US regions. |
| 245 | + |
| 246 | +</Note> |
0 commit comments