Skip to content

Commit 0304ede

Browse files
authored
document HF Secret, update name (#1508)
* document HF Secret, update name * remove hallucinated kwarg * update name of example * fix URLs
1 parent e7905cf commit 0304ede

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

06_gpu_and_ml/llm-serving/lfm_snapshot.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,13 @@
8888
# In addition to pointing the Hugging Face Hub at the path
8989
# where we mount the Volume, we also
9090
# [turn on "high performance" downloads](https://huggingface.co/docs/hub/en/models-downloading#faster-downloads),
91-
# which can fully saturate our network bandwidth.
91+
# which can fully saturate our network bandwidth,
92+
# and provide an `HF_TOKEN` via a [Modal Secret](https://modal.com/docs/guide/secrets)
93+
# so that our downloads aren't throttled.
94+
# You'll need to create a Secret named `huggingface-secret`
95+
# with your token [here](https://modal.com/apps/secrets).
96+
97+
hf_secret = modal.Secret.from_name("huggingface-secret")
9298

9399
# ### Caching compilation artifacts
94100

@@ -265,7 +271,7 @@ def wake_up():
265271
# With all this in place, we are ready to define our high-performance, low-latency
266272
# LFM 2 inference server.
267273

268-
app = modal.App("examples-lfm-snapshot")
274+
app = modal.App("example-lfm-snapshot")
269275

270276

271277
@app.cls(
@@ -277,7 +283,7 @@ def wake_up():
277283
"/root/.cache/huggingface": hf_cache_vol,
278284
"/root/.cache/vllm": vllm_cache_vol,
279285
},
280-
secrets=[modal.Secret.from_name("huggingface-secret-liquid")],
286+
secrets=[hf_secret],
281287
enable_memory_snapshot=True,
282288
experimental_options={"enable_gpu_snapshot": True},
283289
region=REGION,
@@ -345,10 +351,10 @@ def stop(self):
345351
# ## Interact with the server
346352

347353
# Once it is deployed, you'll see a URL appear in the command line,
348-
# something like `https://your-workspace-name--examples-lfm-snapshot-lfmvllminference.us-east.modal.direct`.
354+
# something like `https://your-workspace-name--example-lfm-snapshot-lfmvllminference.us-east.modal.direct`.
349355

350356
# You can find [interactive Swagger UI docs](https://swagger.io/tools/swagger-ui/)
351-
# at the `/docs` route of that URL, i.e. `https://your-workspace-name--examples-lfm-snapshot-lfmvllminference.us-east.modal.direct/docs`.
357+
# at the `/docs` route of that URL, i.e. `https://your-workspace-name--example-lfm-snapshot-lfmvllminference.us-east.modal.direct/docs`.
352358
# These docs describe each route and indicate the expected input and output
353359
# and translate requests into `curl` commands.
354360
# For simple routes, you can even send a request directly from the docs page.
@@ -504,7 +510,7 @@ async def _send_request_streaming(
504510
# ```
505511

506512
if __name__ == "__main__":
507-
LfmVllmInference = modal.Cls.from_name("examples-lfm-snapshot", "LfmVllmInference")
513+
LfmVllmInference = modal.Cls.from_name("example-lfm-snapshot", "LfmVllmInference")
508514

509515
async def main():
510516
url = LfmVllmInference._experimental_get_flash_urls()[0]

0 commit comments

Comments
 (0)