From c5036a35ca8da9ba86da4113bbcf9d00b4114131 Mon Sep 17 00:00:00 2001 From: pagezyhf Date: Fri, 18 Oct 2024 15:11:47 +0200 Subject: [PATCH 1/2] model_name raising error because of the . --- docs/sagemaker/inference.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sagemaker/inference.md b/docs/sagemaker/inference.md index 920a922ee..29827d149 100644 --- a/docs/sagemaker/inference.md +++ b/docs/sagemaker/inference.md @@ -358,12 +358,12 @@ You should also define `SM_NUM_GPUS`, which specifies the tensor parallelism deg Note that you can optionally reduce the memory and computational footprint of the model by setting the `HF_MODEL_QUANTIZE` environment variable to `true`, but this lower weight precision could affect the quality of the output for some models. ```python -model_name = "llama-3.1-8b-instruct" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime()) +model_name = "llama-3-1-8b-instruct" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime()) hub = { - 'HF_MODEL_ID':'EleutherAI/gpt-neox-20b', + 'HF_MODEL_ID':'meta-llama/Llama-3.1-8B-Instruct', 'SM_NUM_GPUS':'1', - 'HUGGING_FACE_HUB_TOKEN': '' + 'HUGGING_FACE_HUB_TOKEN': '', } assert hub['HUGGING_FACE_HUB_TOKEN'] != '', "You have to provide a token." From 708ff322d7b4718352db6a96fe1f577fe093acc3 Mon Sep 17 00:00:00 2001 From: pagezyhf <165770107+pagezyhf@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:25:41 +0200 Subject: [PATCH 2/2] Update docs/sagemaker/inference.md Co-authored-by: Julien Chaumond --- docs/sagemaker/inference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sagemaker/inference.md b/docs/sagemaker/inference.md index 29827d149..ae440e304 100644 --- a/docs/sagemaker/inference.md +++ b/docs/sagemaker/inference.md @@ -363,7 +363,7 @@ model_name = "llama-3-1-8b-instruct" + time.strftime("%Y-%m-%d-%H-%M-%S", time.g hub = { 'HF_MODEL_ID':'meta-llama/Llama-3.1-8B-Instruct', 'SM_NUM_GPUS':'1', - 'HUGGING_FACE_HUB_TOKEN': '', + 'HUGGING_FACE_HUB_TOKEN': '', } assert hub['HUGGING_FACE_HUB_TOKEN'] != '', "You have to provide a token."