From c5036a35ca8da9ba86da4113bbcf9d00b4114131 Mon Sep 17 00:00:00 2001
From: pagezyhf <simon.pagezy@huggingface.co>
Date: Fri, 18 Oct 2024 15:11:47 +0200
Subject: [PATCH 1/2] model_name raising error because of the .

---
 docs/sagemaker/inference.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/sagemaker/inference.md b/docs/sagemaker/inference.md
index 920a922ee..29827d149 100644
--- a/docs/sagemaker/inference.md
+++ b/docs/sagemaker/inference.md
@@ -358,12 +358,12 @@ You should also define `SM_NUM_GPUS`, which specifies the tensor parallelism deg
 Note that you can optionally reduce the memory and computational footprint of the model by setting the `HF_MODEL_QUANTIZE` environment variable to `true`, but this lower weight precision could affect the quality of the output for some models.
 
 ```python
-model_name = "llama-3.1-8b-instruct" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
+model_name = "llama-3-1-8b-instruct" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
 
 hub = {
-    'HF_MODEL_ID':'EleutherAI/gpt-neox-20b',
+    'HF_MODEL_ID':'meta-llama/Llama-3.1-8B-Instruct',
     'SM_NUM_GPUS':'1',
-	'HUGGING_FACE_HUB_TOKEN': '<REPLACE WITH YOUR TOKEN>'
+	  'HUGGING_FACE_HUB_TOKEN': '<REPLACE WITH YOUR TOKEN>',
 }
 
 assert hub['HUGGING_FACE_HUB_TOKEN'] != '<REPLACE WITH YOUR TOKEN>', "You have to provide a token."

From 708ff322d7b4718352db6a96fe1f577fe093acc3 Mon Sep 17 00:00:00 2001
From: pagezyhf <165770107+pagezyhf@users.noreply.github.com>
Date: Fri, 18 Oct 2024 16:25:41 +0200
Subject: [PATCH 2/2] Update docs/sagemaker/inference.md

Co-authored-by: Julien Chaumond <julien@huggingface.co>
---
 docs/sagemaker/inference.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sagemaker/inference.md b/docs/sagemaker/inference.md
index 29827d149..ae440e304 100644
--- a/docs/sagemaker/inference.md
+++ b/docs/sagemaker/inference.md
@@ -363,7 +363,7 @@ model_name = "llama-3-1-8b-instruct" + time.strftime("%Y-%m-%d-%H-%M-%S", time.g
 hub = {
     'HF_MODEL_ID':'meta-llama/Llama-3.1-8B-Instruct',
     'SM_NUM_GPUS':'1',
-	  'HUGGING_FACE_HUB_TOKEN': '<REPLACE WITH YOUR TOKEN>',
+	'HUGGING_FACE_HUB_TOKEN': '<REPLACE WITH YOUR TOKEN>',
 }
 
 assert hub['HUGGING_FACE_HUB_TOKEN'] != '<REPLACE WITH YOUR TOKEN>', "You have to provide a token."