@@ -48,6 +48,8 @@ Next, let's install our base dependencies, then add the extra dependencies we ne
4848``` bash
4949# Install the base dependencies
5050uv sync
51+ # Add hugginface hub dependencies
52+ uv add huggingface_hub
5153# Add the extra optional ml dependencies
5254uv add torch transformers scipy --optional ml
5355```
@@ -75,13 +77,19 @@ from nitric.resources import api, bucket, job
7577main_api = api(" main" )
7678# A job for generating our audio content
7779gen_audio_job = job(" audio" )
78- # A job to pre-download models
79- download_audio_model_job = job(" download-audio-model" )
8080
8181# A bucket for storing output audio clips
8282clips_bucket = bucket(" clips" )
8383# And another bucket for storing our models
8484models_bucket = bucket(" models" )
85+
86+ # Many cloud API Gateways impose hard response time limits on synchronous requests.
87+ # To avoid these limits, we can use a Pub/Sub topic to trigger asynchronous processing.
88+ download_audio_model_topic = topic(" download-audio-model" )
89+
90+ model_dir = os.path.join(tempfile.gettempdir(), " ai-podcast" , " .model" )
91+ cache_dir = os.path.join(tempfile.gettempdir(), " ai-podcast" , " .cache" )
92+ zip_path = os.path.join(tempfile.gettempdir(), " ai-podcast" , " model.zip" )
8593```
8694
8795We'll also need an ` __init__.py ` file in the ` common ` directory to make it a package.
@@ -297,64 +305,92 @@ Feel free to play around with it a bit more before continuing on. It can be fun
297305
298306Before we can deploy our project to the cloud we need to make a few changes. First, we want to be able to cache models to be used between runs without having to pull them from Hugging Face each time.
299307
300- This is why we added the models bucket and download job initially. It's time to use them. Let's add the download job to ` batches/podcast .py` .
308+ This is why we added the models bucket and download topic initially. It's time to use them. Let's add the download topic subscriber and api endpoint to ` services/api .py` .
301309
302- ``` python title: batches/podcast.py
303- from common.resources import gen_audio_job, clips_bucket, models_bucket, download_audio_model_job
304- from nitric.context import JobContext
310+ ``` python title: services/api.py
311+ from common.resources import main_api, model_dir, cache_dir, zip_path, gen_audio_job, download_audio_model_topic, models_bucket
305312from nitric.application import Nitric
306- from transformers import AutoProcessor, BarkModel
307-
308- import scipy
309- import io
310- import torch
311- import numpy as np
312- import requests
313- import zipfile
313+ from nitric.context import HttpContext, MessageContext
314+ from huggingface_hub import snapshot_download
314315import os
316+ import zipfile
317+ import requests
315318
316- # Allow the batch service to read and write to the models bucket and write to the clips bucket
317- clips = clips_bucket.allow(' write' )
318- models = models_bucket.allow(' read' , ' write' )
319-
320- model_dir = " ./.model"
321- # Download the model and save it to a nitric bucket
322- @download_audio_model_job (cpus = 4 , memory = 12000 )
323- async def do_download_audio_model (ctx : JobContext):
324- model_id = ctx.req.data[" model_id" ]
325-
326- print (" Downloading models - this may take several minutes without much feedback, please be patient" )
327- processor = AutoProcessor.from_pretrained(model_id)
328- model = BarkModel.from_pretrained(model_id)
319+ models = models_bucket.allow(' write' )
320+ generate_audio = gen_audio_job.allow(' submit' )
321+ download_audio_model = download_audio_model_topic.allow(" publish" )
329322
330- processor.save_pretrained( f " { model_dir } /processor " )
331- model.save_pretrained( f " { model_dir } /audio " )
323+ audio_model_id = " suno/bark "
324+ default_voice_preset = " v2/en_speaker_6 "
332325
326+ @download_audio_model_topic.subscribe ()
327+ async def do_download_audio_model (ctx : MessageContext):
328+ model_id: str = ctx.req.data[" model_id" ]
329+
330+ print (f " Downloading model to { model_dir} " )
331+ dir = snapshot_download(model_id, local_dir = model_dir, cache_dir = cache_dir, allow_patterns = [
332+ " config.json" ,
333+ " generation_config.json" ,
334+ " pytorch_model.bin" ,
335+ " speaker_embeddings_path.json" ,
336+ " special_tokens_map.json" ,
337+ " tokenizer.json" ,
338+ " tokenizer_config.json" ,
339+ " vocab.txt"
340+ ])
341+
342+ print (f " Downloaded model to { dir } " )
343+
344+ # zip the model and upload it to the bucket
333345 print (" Compressing models" )
334- zip_path = " model.zip"
335346
336347 # zip the model
337348 with zipfile.ZipFile(zip_path, ' w' , zipfile.ZIP_STORED ) as zip_file:
338- for root, dirs, files in os.walk(model_dir ):
349+ for root, dirs, files in os.walk(dir ):
339350 for file in files:
340351 file_path = os.path.join(root, file )
341- archive_name = os.path.relpath(file_path, start = model_dir )
352+ archive_name = os.path.relpath(file_path, start = dir )
342353 print (f " Adding { file_path} to zip as { archive_name} " )
343354 zip_file.write(file_path, archive_name)
344355
345- print (" Storing models in bucket" )
346- # push the archive
356+ # upload the model to the bucket
347357 module_url = await models.file(f " { model_id} .zip " ).upload_url()
348- print (module_url)
358+
349359 with open (zip_path, " rb" ) as f:
350360 requests.put(module_url, data = f, timeout = 6000 )
351- print (" Done!" )
352361
362+ os.remove(zip_path)
363+
364+ print (" Successfully cached model in bucket" )
365+
366+ @main_api.post (" /download-model" )
367+ async def download_audio (ctx : HttpContext):
368+ model_id = ctx.req.query.get(" model" , audio_model_id)
369+ # asynchronously download the model
370+ await download_audio_model.publish({ " model_id" : model_id })
353371```
354372
355373We'll also update our audio generation job to download the model from the bucket before processing the audio.
356374
357375``` python title: batches/podcast.py
376+ from common.resources import model_dir, zip_path, gen_audio_job, clips_bucket, models_bucket
377+ from nitric.context import JobContext
378+ from nitric.application import Nitric
379+ from transformers import AutoProcessor, BarkModel
380+
381+ import scipy
382+ import io
383+ import torch
384+ import numpy as np
385+ import requests
386+ import zipfile
387+ import os
388+
389+ clips = clips_bucket.allow(" write" )
390+ models = models_bucket.allow(" read" )
391+
392+ # This defines the Job Handler that will process all audio generation jobs
393+ # using the job definition we created in the resources module
358394@gen_audio_job (cpus = 4 , memory = 12000 , gpus = 1 )
359395async def do_generate_audio (ctx : JobContext):
360396 file = ctx.req.data[" file" ]
@@ -367,21 +403,23 @@ async def do_generate_audio(ctx: JobContext):
367403 print (" Downloading model" )
368404 download_url = await models.file(f " { model_id} .zip " ).download_url()
369405 response = requests.get(download_url, allow_redirects = True , timeout = 600 )
406+
407+ # make sure zip_path exists
408+ os.makedirs(os.path.dirname(zip_path), exist_ok = True )
370409 # save the zip file
371- with open (" model.zip " , " wb" ) as f:
410+ with open (zip_path , " wb" ) as f:
372411 f.write(response.content)
373412 print (" Unzipping model" )
374- with zipfile.ZipFile(" model.zip " , ' r' ) as zip_ref:
413+ with zipfile.ZipFile(zip_path , ' r' ) as zip_ref:
375414 zip_ref.extractall(model_dir)
376415
377416 # cleanup zip file
378417 print (" Cleaning up" )
379- os.remove(" model.zip" )
380-
418+ os.remove(zip_path)
381419
382420 print (" Loading model" )
383- model = BarkModel.from_pretrained(f " { model_dir} /audio " )
384- processor = AutoProcessor.from_pretrained(" ./.model/processor " )
421+ model = BarkModel.from_pretrained(f " { model_dir} " )
422+ processor = AutoProcessor.from_pretrained(f " { model_dir } " )
385423 print (" Model loaded" )
386424
387425 print (f ' Using voice preset { voice_preset} ' )
@@ -435,61 +473,6 @@ Nitric.run()
435473 where you're also paying for a GPU.
436474</Note >
437475
438- Then we can add an API endpoint to trigger the download job and update the API endpoint to allow selection of models and voice presets.
439-
440- ``` python title: services/api.py
441- from common.resources import main_api, gen_audio_job, download_audio_model_job, models_bucket
442- from nitric.application import Nitric
443- from nitric.context import HttpContext
444-
445- generate_audio = gen_audio_job.allow(' submit' )
446- download_audio_model = download_audio_model_job.allow(' submit' )
447- models = models_bucket.allow(" read" )
448-
449- audio_model_id = " suno/bark"
450- default_voice_preset = " v2/en_speaker_6"
451-
452- @main_api.post (" /download-audio-model" )
453- async def download_audio (ctx : HttpContext):
454- model_id = ctx.req.query.get(" model" , audio_model_id)
455-
456- if isinstance (model_id, list ):
457- model_id = model_id[0 ]
458-
459- await download_audio_model.submit({ " model_id" : model_id })
460-
461- # Generate a sample voice line
462- @main_api.post (" /audio/:filename" )
463- async def submit_auto (ctx : HttpContext):
464- name = ctx.req.params[" filename" ]
465- model_id = ctx.req.query.get(" model" , audio_model_id)
466- preset = ctx.req.query.get(" preset" , default_voice_preset)
467-
468- if isinstance (model_id, list ):
469- model_id = model_id[0 ]
470-
471- model_downloaded = await models.exists(f " { model_id} .zip " )
472- if not model_downloaded:
473- ctx.res.status = 404
474- ctx.res.body = f ' model \' { model_id} \' hasn \' t been downloaded yet, call POST: /download-audio-model to pre-download the model '
475- return
476-
477- if isinstance (preset, list ):
478- preset = preset[0 ]
479-
480- body = ctx.req.data
481- if body is None :
482- ctx.res.status = 400
483- return
484-
485- print (f " using preset { preset} " )
486-
487- await generate_audio.submit({" file" : name, " model_id" : model_id, " text" : body.decode(' utf-8' ), " preset" : preset})
488-
489-
490- Nitric.run()
491- ```
492-
493476Once that's done we can give the project another test, just to make sure everything is still working as expected.
494477
495478If nitric isn't still running you can start it again with:
@@ -647,15 +630,15 @@ batch-compute-env:
647630 - g5 # Or use another instance type that supports GPUs
648631 - optimal
649632 # Customize the ECS launch template for this compute environment
650- launch-template :
651- # Increase the default docker system volume size
652- # This is so large models can be downloaded and cached
653- block-device-mappings :
654- - device-name : /dev/xvda
655- ebs :
656- volume-size : 50
657- volume-type : gp2
658- delete-on-termination : ' true '
633+
634+ config :
635+ # How functions without a type will be deployed
636+ default :
637+ lambda : # Available since v0.26.0
638+ # increase ephemeral storage size to allow for largish model files
639+ ephemeral-storage : 10000
640+ memory : 1024
641+ timeout : 900
659642` ` `
660643
661644<Note>
0 commit comments