@@ -35,7 +35,7 @@ Next, let's install our base dependencies, then add the `openai-whisper` library
3535#  Install the base dependencies
3636uv sync
3737#  Add OpenAI whisper dependency
38- uv add openai-whisper --optional ml
38+ uv add openai-whisper librosa numpy  --optional ml
3939``` 
4040
4141<Note >
@@ -110,19 +110,19 @@ submittable_transcribe_job = transcribe_job.allow("submit")
110110
111111@main_api.get (" /podcast/:name"  )
112112async  def  get_podcast (ctx : HttpContext):
113-      name =  ctx.req.params[' name'  ]
113+   name =  ctx.req.params[' name'  ]
114114
115-      download_url =  await  readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
115+   download_url =  await  readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
116116
117-      ctx.res.headers[" Location"  ] =  download_url
118-      ctx.res.status =  303 
117+   ctx.res.headers[" Location"  ] =  download_url
118+   ctx.res.status =  303 
119119
120-      return  ctx
120+   return  ctx
121121
122122Nitric.run()
123123``` 
124124
125- We will add a storage listener which  will be triggered by files being added  to the ` podcast_bucket ` .
125+ We will then  add a route to get an upload URL for the bucket. We  will do this via a URL as it circumvents the size limits of requests  to the API Gateway .
126126
127127``` python  title:services/api.py
128128#  !collapse(1:18) collapsed
@@ -134,39 +134,126 @@ from nitric.context import HttpContext
134134readable_transcript_bucket =  transcript_bucket.allow(" read"  )
135135submittable_transcribe_job =  transcribe_job.allow(" submit"  )
136136
137+ @main_api.get (" /podcast/:name"  )
138+ async  def  get_podcast (ctx : HttpContext):
139+   name =  ctx.req.params[' name'  ]
140+ 
141+   download_url =  await  readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
142+ 
143+   ctx.res.headers[" Location"  ] =  download_url
144+   ctx.res.status =  303 
145+ 
146+   return  ctx
147+ 
148+ @main_api.get (" /audio-upload-url/:name"  )
149+ async  def  get_audio_upload_url (ctx : HttpContext):
150+   name =  ctx.req.params[' name'  ]
151+ 
152+   upload_url =  await  writable_podcast_bucket.file(name).upload_url()
153+ 
154+   ctx.res.body =  upload_url
155+ 
156+ Nitric.run()
157+ ``` 
158+ 
159+ We will add a storage listener which will be triggered by files being added to the ` podcast_bucket ` .
160+ 
161+ ``` python  title:services/api.py
162+ #  !collapse(1:26) collapsed
163+ from  common.resources import  main_api, transcript_bucket, podcast_bucket, transcribe_job
164+ from  nitric.application import  Nitric
165+ from  nitric.resources import  BucketNotificationContext
166+ from  nitric.context import  HttpContext
167+ 
168+ readable_transcript_bucket =  transcript_bucket.allow(" read"  )
169+ submittable_transcribe_job =  transcribe_job.allow(" submit"  )
170+ 
137171@main_api.get (" /transcript/:name"  )
138172async  def  get_podcast (ctx : HttpContext):
139-      name =  ctx.req.params[' name'  ]
173+   name =  ctx.req.params[' name'  ]
140174
141-      download_url =  await  readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
175+   download_url =  await  readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
142176
143-      ctx.res.headers[" Location"  ] =  download_url
144-      ctx.res.status =  303 
177+   ctx.res.headers[" Location"  ] =  download_url
178+   ctx.res.status =  303 
145179
146-     return  ctx
180+   return  ctx
181+ 
182+ @main_api.get (" /podcast/:name"  )
183+ async  def  get_podcast (ctx : HttpContext):
184+   name =  ctx.req.params[' name'  ]
185+ 
186+   download_url =  await  readable_transcript_bucket.file(f " { name} -transcript.txt " ).download_url()
187+ 
188+   ctx.res.headers[" Location"  ] =  download_url
189+   ctx.res.status =  303 
190+ 
191+   return  ctx
147192
148193@podcast_bucket.on (" write"  , " *"  )
149194async  def  on_add_podcast (ctx : BucketNotificationContext):
150-      await  submittable_transcribe_job.submit({ " podcast_name"  : ctx.req.key })
195+   await  submittable_transcribe_job.submit({ " podcast_name"  : ctx.req.key })
151196
152-      return  ctx
197+   return  ctx
153198
154199Nitric.run()
155200``` 
156201
202+ ## Downloading our model  
203+ 
204+ We can download our model and embed it into our container to reduce the start up time of our transcription. We'll create a script which can be triggered using ` uv run download_model.py --model_name turbo ` .
205+ 
206+ ``` python  title:download_model.py
207+ from  whisper import  _MODELS , _download
208+ import  argparse
209+ import  os
210+ 
211+ default =  os.path.join(os.path.expanduser(" ~"  ), " .cache"  )
212+ download_root =  os.path.join(os.getenv(" XDG_CACHE_HOME"  , default), " whisper"  )
213+ 
214+ def  download_whisper_model (model_name = " base"  ):
215+   print (" downloading model..."  )
216+   #  if we have the original download go to the default whisper cache
217+   model =  _download(_MODELS [model_name], root = download_root, in_memory = True )
218+ 
219+   #  make sure the ./model directory exists
220+   os.makedirs(" ./.model"  , exist_ok = True )
221+ 
222+   #  write the model to disk
223+   save_path =  f " ./.model/model.pt " 
224+   with  open (save_path, " wb"  ) as  f:
225+     f.write(model)
226+ 
227+   print (f " Model ' { model_name} ' has been downloaded and saved to './model/model.pt'. " )
228+ 
229+ if  __name__  ==  " __main__"  :
230+   parser =  argparse.ArgumentParser(description = " Download a Whisper model."  )
231+   parser.add_argument(" --model_name"  , type = str , default = " base"  , help = " Name of the model to download."  )
232+ 
233+   args =  parser.parse_args()
234+ 
235+   download_whisper_model(model_name = args.model_name)
236+ ``` 
237+ 
157238## Add Transcribe Batch Job  
158239
159- Start by adding our imports and adding permissions to the resources we defined earlier.
240+ Start by adding our imports and adding permissions to the resources we defined earlier. We'll also get the location of the model set as an environment variable, defaulting to  ` ./.model/model.pt ` . 
160241
161242``` python  title:batches/transcribe.py
162243import  whisper
244+ import  io
245+ import  numpy as  np
246+ import  os
247+ import  librosa
163248from  common.resources import  transcribe_job, transcript_bucket, podcast_bucket
164249from  nitric.context import  JobContext
165250from  nitric.application import  Nitric
166251
167252writeable_transcript_bucket =  transcript_bucket.allow(" write"  )
168253readable_podcast_bucket =  podcast_bucket.allow(" read"  )
169254
255+ MODEL  =  os.environ.get(" MODEL"  , " ./.model/model.pt"  )
256+ 
170257Nitric.run()
171258``` 
172259
@@ -181,74 +268,96 @@ We'll then create our Job and set the required memory to `12000`. This is a safe
181268|  large  |  1550 M     |  N/A                |  large              |  ` ~10 GB `       |  ` 1x `            | 
182269
183270``` python  title:batches/transcribe.py
184- #  !collapse(1:7 ) collapsed
271+ #  !collapse(1:13 ) collapsed
185272import  whisper
273+ import  io
274+ import  numpy as  np
275+ import  os
276+ import  librosa
186277from  common.resources import  transcribe_job, transcript_bucket, podcast_bucket
187278from  nitric.context import  JobContext
188279from  nitric.application import  Nitric
189280
190281writeable_transcript_bucket =  transcript_bucket.allow(" write"  )
191282readable_podcast_bucket =  podcast_bucket.allow(" read"  )
192283
284+ MODEL  =  os.environ.get(" MODEL"  , " ./.model/model.pt"  )
285+ 
193286@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
194287async  def  transcribe_podcast (ctx : JobContext):
195288  return  ctx
196289
197290Nitric.run()
198291``` 
199292
200- We'll then read the audio file that is referenced in the ` JobContext `  data that was sent with the submit request. We'll write the podcast to  a local file  so that the model  can read from it .
293+ We'll then read the audio file that is referenced in the ` JobContext `  data that was sent with the submit request. We'll load these bytes into  a variable as a floating point time series using  ` librosa `   so that it  can be converted to a  ` numpy `  array for use by the  ` whisper ` .
201294
202295``` python  title:batches/transcribe.py
203- #  !collapse(1:7 ) collapsed
296+ #  !collapse(1:13 ) collapsed
204297import  whisper
298+ import  io
299+ import  numpy as  np
300+ import  os
301+ import  librosa
205302from  common.resources import  transcribe_job, transcript_bucket, podcast_bucket
206303from  nitric.context import  JobContext
207304from  nitric.application import  Nitric
208305
209306writeable_transcript_bucket =  transcript_bucket.allow(" write"  )
210307readable_podcast_bucket =  podcast_bucket.allow(" read"  )
211308
309+ MODEL  =  os.environ.get(" MODEL"  , " ./.model/model.pt"  )
310+ 
212311@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
213312async  def  transcribe_podcast (ctx : JobContext):
214313  podcast_name =  ctx.req.data[" podcast_name"  ]
215314  print (f " Transcribing:  { podcast_name} " )
216315
217316  podcast =  await  readable_podcast_bucket.file(podcast_name).read()
218317
219-   with  open (" local-podcast"  , " wb"  ) as  f:
220-     f.write(podcast)
318+   podcast_io =  io.BytesIO(podcast)
319+ 
320+   y, sr =  librosa.load(podcast_io)
321+   audio_array =  np.array(y)
221322
222323  return  ctx
223324
224325Nitric.run()
225326``` 
226327
227- We'll then load our model and transcribe the audio. This is where we can choose the model based on balancing speed, size, and accuracy.  We can turn off ` FP16 `  with ` fp16=False `  which will use ` FP32 `  instead. This will depend on what is supported on your CPU when testing locally, however, ` FP16 `  and ` FP32 `  are supported on Lambda.
328+ We'll then load our model and transcribe the audio. We can turn off ` FP16 `  with ` fp16=False `  which will use ` FP32 `  instead. This will depend on what is supported on your CPU when testing locally, however, ` FP16 `  and ` FP32 `  are supported on Lambda.
228329
229330``` python  title:batches/transcribe.py
230- #  !collapse(1:7 ) collapsed
331+ #  !collapse(1:13 ) collapsed
231332import  whisper
333+ import  io
334+ import  numpy as  np
335+ import  os
336+ import  librosa
232337from  common.resources import  transcribe_job, transcript_bucket, podcast_bucket
233338from  nitric.context import  JobContext
234339from  nitric.application import  Nitric
235340
236341writeable_transcript_bucket =  transcript_bucket.allow(" write"  )
237342readable_podcast_bucket =  podcast_bucket.allow(" read"  )
238343
344+ MODEL  =  os.environ.get(" MODEL"  , " ./.model/model.pt"  )
345+ 
239346@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
240- #  !collapse(1:9 ) collapsed
347+ #  !collapse(1:10 ) collapsed
241348async  def  transcribe_podcast (ctx : JobContext):
242349  podcast_name =  ctx.req.data[" podcast_name"  ]
243350  print (f " Transcribing:  { podcast_name} " )
244351
245352  podcast =  await  readable_podcast_bucket.file(podcast_name).read()
246353
247-   with  open (" local-podcast"  , " wb"  ) as  f:
248-     f.write(podcast)
354+   podcast_io =  io.BytesIO(podcast)
355+ 
356+   y, sr =  librosa.load(podcast_io)
357+   audio_array =  np.array(y)
249358
250-   model =  whisper.load_model(" turbo "  )
251-   result =  model.transcribe(" local-podcast "  , verbose = True , fp16 = False )
359+   model =  whisper.load_model(MODEL )
360+   result =  model.transcribe(audio_array , verbose = True , fp16 = False )
252361
253362  return  ctx
254363
@@ -258,35 +367,44 @@ Nitric.run()
258367Finally, we'll take the outputted transcript and write that to the transcript bucket. This transcript is stored in ` result["text"] ` .
259368
260369``` python  title:batches/transcribe.py
261- #  !collapse(1:7 ) collapsed
370+ #  !collapse(1:13 ) collapsed
262371import  whisper
372+ import  io
373+ import  numpy as  np
374+ import  os
375+ import  librosa
263376from  common.resources import  transcribe_job, transcript_bucket, podcast_bucket
264377from  nitric.context import  JobContext
265378from  nitric.application import  Nitric
266379
267380writeable_transcript_bucket =  transcript_bucket.allow(" write"  )
268381readable_podcast_bucket =  podcast_bucket.allow(" read"  )
269382
383+ MODEL  =  os.environ.get(" MODEL"  , " ./.model/model.pt"  )
384+ 
270385@transcribe_job (cpus = 1 , memory = 12000 , gpus = 0 )
271- #  !collapse(1:12 ) collapsed
386+ #  !collapse(1:13 ) collapsed
272387async  def  transcribe_podcast (ctx : JobContext):
273388  podcast_name =  ctx.req.data[" podcast_name"  ]
274389  print (f " Transcribing:  { podcast_name} " )
275390
276391  podcast =  await  readable_podcast_bucket.file(podcast_name).read()
277392
278-   with  open (" local-podcast"  , " wb"  ) as  f:
279-     f.write(podcast)
393+   podcast_io =  io.BytesIO(podcast)
280394
281-   model  =  whisper.load_model( " turbo "  )
282-   result  =  model.transcribe( " local-podcast " ,  verbose = True ,  fp16 = False )
395+   y, sr  =  librosa.load(podcast_io )
396+   audio_array  =  np.array(y )
283397
284-     transcript =  result[" text"  ].encode()
398+   model =  whisper.load_model(MODEL )
399+   result =  model.transcribe(audio_array, verbose = True , fp16 = False )
285400
286-     print (" Finished transcoding... Writing to Bucket"  )
287-     await  writeable_transcript_bucket.file(f " { podcast_name} -transcript.txt " ).write(transcript)
401+   transcript =  result[" text"  ].encode()
288402
289-     return  ctx
403+   print (" Finished transcoding... Writing to Bucket"  )
404+   await  writeable_transcript_bucket.file(f " { podcast_name} -transcript.txt " ).write(transcript)
405+   print (" Done!"  )
406+ 
407+   return  ctx
290408
291409Nitric.run()
292410``` 
@@ -403,6 +521,19 @@ We'll add a `dockerignore` to help reduce the size of the Docker Image that is b
403521.mypy_cache/ 
404522.nitric/ 
405523.venv/ 
524+ nitric-spec.json 
525+ nitric.yaml 
526+ README.md 
527+ ``` 
528+ 
529+ And add ` ./model `  to the python docker ignore.
530+ 
531+ ``` text  tile:python.dockerignore
532+ .mypy_cache/ 
533+ .nitric/ 
534+ .venv/ 
535+ .model/ 
536+ nitric-spec.json 
406537nitric.yaml 
407538README.md 
408539``` 
0 commit comments