8888# In addition to pointing the Hugging Face Hub at the path
8989# where we mount the Volume, we also
9090# [turn on "high performance" downloads](https://huggingface.co/docs/hub/en/models-downloading#faster-downloads),
91- # which can fully saturate our network bandwidth.
91+ # which can fully saturate our network bandwidth,
92+ # and provide an `HF_TOKEN` via a [Modal Secret](https://modal.com/docs/guide/secrets)
93+ # so that our downloads aren't throttled.
94+ # You'll need to create a Secret named `huggingface-secret`
95+ # with your token [here](https://modal.com/apps/secrets).
96+
97+ hf_secret = modal .Secret .from_name ("huggingface-secret" )
9298
9399# ### Caching compilation artifacts
94100
@@ -265,7 +271,7 @@ def wake_up():
265271# With all this in place, we are ready to define our high-performance, low-latency
266272# LFM 2 inference server.
267273
268- app = modal .App ("examples -lfm-snapshot" )
274+ app = modal .App ("example -lfm-snapshot" )
269275
270276
271277@app .cls (
@@ -277,7 +283,7 @@ def wake_up():
277283 "/root/.cache/huggingface" : hf_cache_vol ,
278284 "/root/.cache/vllm" : vllm_cache_vol ,
279285 },
280- secrets = [modal . Secret . from_name ( "huggingface-secret-liquid" ) ],
286+ secrets = [hf_secret ],
281287 enable_memory_snapshot = True ,
282288 experimental_options = {"enable_gpu_snapshot" : True },
283289 region = REGION ,
@@ -345,10 +351,10 @@ def stop(self):
345351# ## Interact with the server
346352
347353# Once it is deployed, you'll see a URL appear in the command line,
348- # something like `https://your-workspace-name--examples -lfm-snapshot-lfmvllminference.us-east.modal.direct`.
354+ # something like `https://your-workspace-name--example -lfm-snapshot-lfmvllminference.us-east.modal.direct`.
349355
350356# You can find [interactive Swagger UI docs](https://swagger.io/tools/swagger-ui/)
351- # at the `/docs` route of that URL, i.e. `https://your-workspace-name--examples -lfm-snapshot-lfmvllminference.us-east.modal.direct/docs`.
357+ # at the `/docs` route of that URL, i.e. `https://your-workspace-name--example -lfm-snapshot-lfmvllminference.us-east.modal.direct/docs`.
352358# These docs describe each route and indicate the expected input and output
353359# and translate requests into `curl` commands.
354360# For simple routes, you can even send a request directly from the docs page.
@@ -504,7 +510,7 @@ async def _send_request_streaming(
504510# ```
505511
506512if __name__ == "__main__" :
507- LfmVllmInference = modal .Cls .from_name ("examples -lfm-snapshot" , "LfmVllmInference" )
513+ LfmVllmInference = modal .Cls .from_name ("example -lfm-snapshot" , "LfmVllmInference" )
508514
509515 async def main ():
510516 url = LfmVllmInference ._experimental_get_flash_urls ()[0 ]
0 commit comments