@@ -35,7 +35,7 @@ Next, let's install our base dependencies, then add the `openai-whisper` library
3535# Install the base dependencies
3636uv sync
3737# Add OpenAI whisper dependency
38- uv add openai-whisper --optional ml
38+ uv add openai-whisper librosa numpy --optional ml
3939```
4040
4141<Note >
@@ -110,19 +110,19 @@ submittable_transcribe_job = transcribe_job.allow("submit")
110110
111111@main_api.get (" /podcast/:name" )
112112async def get_podcast (ctx : HttpContext):
113- name = ctx.req.params[' name' ]
113+ name = ctx.req.params[' name' ]
114114
115- download_url = await readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
115+ download_url = await readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
116116
117- ctx.res.headers[" Location" ] = download_url
118- ctx.res.status = 303
117+ ctx.res.headers[" Location" ] = download_url
118+ ctx.res.status = 303
119119
120- return ctx
120+ return ctx
121121
122122Nitric.run()
123123```
124124
125- We will add a storage listener which will be triggered by files being added to the ` podcast_bucket ` .
125+ We will then add a route to get an upload URL for the bucket. We will do this via a URL as it circumvents the size limits of requests to the API Gateway .
126126
127127``` python title:services/api.py
128128# !collapse(1:18) collapsed
@@ -134,39 +134,126 @@ from nitric.context import HttpContext
134134readable_transcript_bucket = transcript_bucket.allow(" read" )
135135submittable_transcribe_job = transcribe_job.allow(" submit" )
136136
137+ @main_api.get (" /podcast/:name" )
138+ async def get_podcast (ctx : HttpContext):
139+ name = ctx.req.params[' name' ]
140+
141+ download_url = await readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
142+
143+ ctx.res.headers[" Location" ] = download_url
144+ ctx.res.status = 303
145+
146+ return ctx
147+
148+ @main_api.get (" /audio-upload-url/:name" )
149+ async def get_audio_upload_url (ctx : HttpContext):
150+ name = ctx.req.params[' name' ]
151+
152+ upload_url = await writable_podcast_bucket.file(name).upload_url()
153+
154+ ctx.res.body = upload_url
155+
156+ Nitric.run()
157+ ```
158+
159+ We will add a storage listener which will be triggered by files being added to the ` podcast_bucket ` .
160+
161+ ``` python title:services/api.py
162+ # !collapse(1:26) collapsed
163+ from common.resources import main_api, transcript_bucket, podcast_bucket, transcribe_job
164+ from nitric.application import Nitric
165+ from nitric.resources import BucketNotificationContext
166+ from nitric.context import HttpContext
167+
168+ readable_transcript_bucket = transcript_bucket.allow(" read" )
169+ submittable_transcribe_job = transcribe_job.allow(" submit" )
170+
137171@main_api.get (" /transcript/:name" )
138172async def get_podcast (ctx : HttpContext):
139- name = ctx.req.params[' name' ]
173+ name = ctx.req.params[' name' ]
140174
141- download_url = await readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
175+ download_url = await readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
142176
143- ctx.res.headers[" Location" ] = download_url
144- ctx.res.status = 303
177+ ctx.res.headers[" Location" ] = download_url
178+ ctx.res.status = 303
145179
146- return ctx
180+ return ctx
181+
182+ @main_api.get (" /podcast/:name" )
183+ async def get_podcast (ctx : HttpContext):
184+ name = ctx.req.params[' name' ]
185+
186+ download_url = await readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
187+
188+ ctx.res.headers[" Location" ] = download_url
189+ ctx.res.status = 303
190+
191+ return ctx
147192
148193@podcast_bucket.on (" write" , " *" )
149194async def on_add_podcast (ctx : BucketNotificationContext):
150- await submittable_transcribe_job.submit({ " podcast_name" : ctx.req.key })
195+ await submittable_transcribe_job.submit({ " podcast_name" : ctx.req.key })
151196
152- return ctx
197+ return ctx
153198
154199Nitric.run()
155200```
156201
202+ ## Downloading our model
203+
204+ We can download our model and embed it into our container to reduce the start up time of our transcription. We'll create a script which can be triggered using ` uv run download_model.py --model_name turbo ` .
205+
206+ ``` python title:download_model.py
207+ from whisper import _MODELS , _download
208+ import argparse
209+ import os
210+
211+ default = os.path.join(os.path.expanduser(" ~" ), " .cache" )
212+ download_root = os.path.join(os.getenv(" XDG_CACHE_HOME" , default), " whisper" )
213+
214+ def download_whisper_model (model_name = " base" ):
215+ print (" downloading model..." )
216+ # if we have the original download go to the default whisper cache
217+ model = _download(_MODELS [model_name], root = download_root, in_memory = True )
218+
219+ # make sure the ./model directory exists
220+ os.makedirs(" ./.model" , exist_ok = True )
221+
222+ # write the model to disk
223+ save_path = f " ./.model/model.pt "
224+ with open (save_path, " wb" ) as f:
225+ f.write(model)
226+
227+ print (f " Model ' { model_name} ' has been downloaded and saved to './model/model.pt'. " )
228+
229+ if __name__ == " __main__" :
230+ parser = argparse.ArgumentParser(description = " Download a Whisper model." )
231+ parser.add_argument(" --model_name" , type = str , default = " base" , help = " Name of the model to download." )
232+
233+ args = parser.parse_args()
234+
235+ download_whisper_model(model_name = args.model_name)
236+ ```
237+
157238## Add Transcribe Batch Job
158239
159- Start by adding our imports and adding permissions to the resources we defined earlier.
240+ Start by adding our imports and adding permissions to the resources we defined earlier. We'll also get the location of the model set as an environment variable, defaulting to ` ./.model/model.pt ` .
160241
161242``` python title:batches/transcribe.py
162243import whisper
244+ import io
245+ import numpy as np
246+ import os
247+ import librosa
163248from common.resources import transcribe_job, transcript_bucket, podcast_bucket
164249from nitric.context import JobContext
165250from nitric.application import Nitric
166251
167252writeable_transcript_bucket = transcript_bucket.allow(" write" )
168253readable_podcast_bucket = podcast_bucket.allow(" read" )
169254
255+ MODEL = os.environ.get(" MODEL" , " ./.model/model.pt" )
256+
170257Nitric.run()
171258```
172259
@@ -181,74 +268,96 @@ We'll then create our Job and set the required memory to `12000`. This is a safe
181268| large | 1550 M | N/A | large | ` ~10 GB ` | ` 1x ` |
182269
183270``` python title:batches/transcribe.py
184- # !collapse(1:7 ) collapsed
271+ # !collapse(1:13 ) collapsed
185272import whisper
273+ import io
274+ import numpy as np
275+ import os
276+ import librosa
186277from common.resources import transcribe_job, transcript_bucket, podcast_bucket
187278from nitric.context import JobContext
188279from nitric.application import Nitric
189280
190281writeable_transcript_bucket = transcript_bucket.allow(" write" )
191282readable_podcast_bucket = podcast_bucket.allow(" read" )
192283
284+ MODEL = os.environ.get(" MODEL" , " ./.model/model.pt" )
285+
193286@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
194287async def transcribe_podcast (ctx : JobContext):
195288 return ctx
196289
197290Nitric.run()
198291```
199292
200- We'll then read the audio file that is referenced in the ` JobContext ` data that was sent with the submit request. We'll write the podcast to a local file so that the model can read from it .
293+ We'll then read the audio file that is referenced in the ` JobContext ` data that was sent with the submit request. We'll load these bytes into a variable as a floating point time series using ` librosa ` so that it can be converted to a ` numpy ` array for use by the ` whisper ` .
201294
202295``` python title:batches/transcribe.py
203- # !collapse(1:7 ) collapsed
296+ # !collapse(1:13 ) collapsed
204297import whisper
298+ import io
299+ import numpy as np
300+ import os
301+ import librosa
205302from common.resources import transcribe_job, transcript_bucket, podcast_bucket
206303from nitric.context import JobContext
207304from nitric.application import Nitric
208305
209306writeable_transcript_bucket = transcript_bucket.allow(" write" )
210307readable_podcast_bucket = podcast_bucket.allow(" read" )
211308
309+ MODEL = os.environ.get(" MODEL" , " ./.model/model.pt" )
310+
212311@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
213312async def transcribe_podcast (ctx : JobContext):
214313 podcast_name = ctx.req.data[" podcast_name" ]
215314 print (f " Transcribing: { podcast_name} " )
216315
217316 podcast = await readable_podcast_bucket.file(podcast_name).read()
218317
219- with open (" local-podcast" , " wb" ) as f:
220- f.write(podcast)
318+ podcast_io = io.BytesIO(podcast)
319+
320+ y, sr = librosa.load(podcast_io)
321+ audio_array = np.array(y)
221322
222323 return ctx
223324
224325Nitric.run()
225326```
226327
227- We'll then load our model and transcribe the audio. This is where we can choose the model based on balancing speed, size, and accuracy. We can turn off ` FP16 ` with ` fp16=False ` which will use ` FP32 ` instead. This will depend on what is supported on your CPU when testing locally, however, ` FP16 ` and ` FP32 ` are supported on Lambda.
328+ We'll then load our model and transcribe the audio. We can turn off ` FP16 ` with ` fp16=False ` which will use ` FP32 ` instead. This will depend on what is supported on your CPU when testing locally, however, ` FP16 ` and ` FP32 ` are supported on Lambda.
228329
229330``` python title:batches/transcribe.py
230- # !collapse(1:7 ) collapsed
331+ # !collapse(1:13 ) collapsed
231332import whisper
333+ import io
334+ import numpy as np
335+ import os
336+ import librosa
232337from common.resources import transcribe_job, transcript_bucket, podcast_bucket
233338from nitric.context import JobContext
234339from nitric.application import Nitric
235340
236341writeable_transcript_bucket = transcript_bucket.allow(" write" )
237342readable_podcast_bucket = podcast_bucket.allow(" read" )
238343
344+ MODEL = os.environ.get(" MODEL" , " ./.model/model.pt" )
345+
239346@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
240- # !collapse(1:9 ) collapsed
347+ # !collapse(1:10 ) collapsed
241348async def transcribe_podcast (ctx : JobContext):
242349 podcast_name = ctx.req.data[" podcast_name" ]
243350 print (f " Transcribing: { podcast_name} " )
244351
245352 podcast = await readable_podcast_bucket.file(podcast_name).read()
246353
247- with open (" local-podcast" , " wb" ) as f:
248- f.write(podcast)
354+ podcast_io = io.BytesIO(podcast)
355+
356+ y, sr = librosa.load(podcast_io)
357+ audio_array = np.array(y)
249358
250- model = whisper.load_model(" turbo " )
251- result = model.transcribe(" local-podcast " , verbose = True , fp16 = False )
359+ model = whisper.load_model(MODEL )
360+ result = model.transcribe(audio_array , verbose = True , fp16 = False )
252361
253362 return ctx
254363
@@ -258,35 +367,44 @@ Nitric.run()
258367Finally, we'll take the outputted transcript and write that to the transcript bucket. This transcript is stored in ` result["text"] ` .
259368
260369``` python title:batches/transcribe.py
261- # !collapse(1:7 ) collapsed
370+ # !collapse(1:13 ) collapsed
262371import whisper
372+ import io
373+ import numpy as np
374+ import os
375+ import librosa
263376from common.resources import transcribe_job, transcript_bucket, podcast_bucket
264377from nitric.context import JobContext
265378from nitric.application import Nitric
266379
267380writeable_transcript_bucket = transcript_bucket.allow(" write" )
268381readable_podcast_bucket = podcast_bucket.allow(" read" )
269382
383+ MODEL = os.environ.get(" MODEL" , " ./.model/model.pt" )
384+
270385@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
271- # !collapse(1:12 ) collapsed
386+ # !collapse(1:13 ) collapsed
272387async def transcribe_podcast (ctx : JobContext):
273388 podcast_name = ctx.req.data[" podcast_name" ]
274389 print (f " Transcribing: { podcast_name} " )
275390
276391 podcast = await readable_podcast_bucket.file(podcast_name).read()
277392
278- with open (" local-podcast" , " wb" ) as f:
279- f.write(podcast)
393+ podcast_io = io.BytesIO(podcast)
280394
281- model = whisper.load_model( " turbo " )
282- result = model.transcribe( " local-podcast " , verbose = True , fp16 = False )
395+ y, sr = librosa.load(podcast_io )
396+ audio_array = np.array(y )
283397
284- transcript = result[" text" ].encode()
398+ model = whisper.load_model(MODEL )
399+ result = model.transcribe(audio_array, verbose = True , fp16 = False )
285400
286- print (" Finished transcoding... Writing to Bucket" )
287- await writeable_transcript_bucket.file(f " { podcast_name} -transcript.txt " ).write(transcript)
401+ transcript = result[" text" ].encode()
288402
289- return ctx
403+ print (" Finished transcoding... Writing to Bucket" )
404+ await writeable_transcript_bucket.file(f " { podcast_name} -transcript.txt " ).write(transcript)
405+ print (" Done!" )
406+
407+ return ctx
290408
291409Nitric.run()
292410```
@@ -403,6 +521,19 @@ We'll add a `dockerignore` to help reduce the size of the Docker Image that is b
403521.mypy_cache/
404522.nitric/
405523.venv/
524+ nitric-spec.json
525+ nitric.yaml
526+ README.md
527+ ```
528+
529+ And add ` ./model ` to the python docker ignore.
530+
531+ ``` text tile:python.dockerignore
532+ .mypy_cache/
533+ .nitric/
534+ .venv/
535+ .model/
536+ nitric-spec.json
406537nitric.yaml
407538README.md
408539```
0 commit comments