Merge branch 'aws:main' into main-docs-update

rsareddy0329 · web-flow · commit ecaf9c3b6c3e · 2025-07-23T14:57:17.000-07:00
diff --git a/examples/inference/CLI/inference-fsx-model-e2e-cli.ipynb b/examples/inference/CLI/inference-fsx-model-e2e-cli.ipynb
@@ -35,7 +35,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"
+    "!hyp set-cluster-context --cluster-name <cluster-name>"
    ]
   },
   {
@@ -47,24 +47,19 @@
    "source": [
     "!hyp create hyp-custom-endpoint \\\n",
     "  --version 1.0 \\\n",
-    "  --env \\\n",
-    "    '{\"HF_MODEL_ID\":\"/opt/ml/model\", \\\n",
-    "    \"SAGEMAKER_PROGRAM\":\"inference.py\", \\\n",
-    "    \"SAGEMAKER_SUBMIT_DIRECTORY\":\"/opt/ml/model/code\", \\\n",
-    "    \"MODEL_CACHE_ROOT\":\"/opt/ml/model\", \\\n",
-    "    \"SAGEMAKER_ENV\":\"1\"}' \\\n",
+    "  --env '{ \"key1\": \"val1\", \"key2\": \"val2\"}' \\\n",
     "  --model-source-type fsx \\\n",
-    "  --model-location deepseek-1-5b \\\n",
-    "  --fsx-file-system-id fs-0e6a92495c35a81f2 \\\n",
-    "  --image-uri 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0 \\\n",
+    "  --model-location <model-location-folder> \\\n",
+    "  --fsx-file-system-id <fsx-file-system-id> \\\n",
+    "  --image-uri <image-uri> \\\n",
     "  --model-volume-mount-name model-weights \\\n",
     "  --container-port 8080 \\\n",
     "  --resources-requests '{\"cpu\": \"4\", \"nvidia.com/gpu\": 1, \"memory\": \"32Gi\"}' \\\n",
     "  --resources-limits '{\"nvidia.com/gpu\": 1}' \\\n",
-    "  --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2 \\\n",
-    "  --instance-type ml.g5.8xlarge \\\n",
-    "  --endpoint-name endpoint-fsx-test-cli \\\n",
-    "  --model-name deepseek15b-fsx-test-cli"
+    "  --tls-certificate-output-s3-uri s3://sample-bucket \\\n",
+    "  --instance-type <instance-type> \\\n",
+    "  --endpoint-name endpoint-fsx \\\n",
+    "  --model-name <model-name>"
    ]
   },
   {
@@ -84,7 +79,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp describe hyp-custom-endpoint --name endpoint-fsx-test-cli"
+    "!hyp describe hyp-custom-endpoint --name endpoint-fsx"
    ]
   },
   {
@@ -94,7 +89,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-fsx-test-cli  --body '{\"inputs\":\"What is the capital of USA?\"}'"
+    "!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-fsx  --body '{\"inputs\":\"What is the capital of USA?\"}'"
    ]
   },
   {
@@ -104,7 +99,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp delete hyp-custom-endpoint --name endpoint-fsx-test-cli"
+    "!hyp delete hyp-custom-endpoint --name endpoint-fsx"
    ]
   },
   {
diff --git a/examples/inference/CLI/inference-jumpstart-e2e-cli.ipynb b/examples/inference/CLI/inference-jumpstart-e2e-cli.ipynb
@@ -1,10 +1,10 @@
 {
  "cells": [
   {
-   "metadata": {},
    "cell_type": "markdown",
-   "source": "",
-   "id": "f28ecfc84cef3505"
+   "id": "f28ecfc84cef3505",
+   "metadata": {},
+   "source": []
   },
   {
    "cell_type": "markdown",
@@ -41,7 +41,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"
+    "!hyp set-cluster-context --cluster-name <cluster-name>"
    ]
   },
   {
@@ -53,11 +53,9 @@
    "source": [
     "!hyp create hyp-jumpstart-endpoint \\\n",
     "  --version 1.0 \\\n",
-    "  --model-id deepseek-llm-r1-distill-qwen-1-5b \\\n",
-    "  --model-version 2.0.4 \\\n",
-    "  --instance-type ml.g5.8xlarge \\\n",
-    "  --endpoint-name endpoint-js-test-cli \\\n",
-    "  --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2"
+    "  --model-id <model-id> \\\n",
+    "  --instance-type <instance-type> \\\n",
+    "  --endpoint-name endpoint-js \\"
    ]
   },
   {
@@ -77,7 +75,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp describe hyp-jumpstart-endpoint --name endpoint-js-test-cli"
+    "!hyp describe hyp-jumpstart-endpoint --name endpoint-js"
    ]
   },
   {
@@ -87,7 +85,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp invoke hyp-jumpstart-endpoint --endpoint-name endpoint-js-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'"
+    "!hyp invoke hyp-jumpstart-endpoint --endpoint-name endpoint-js --body '{\"inputs\":\"What is the capital of USA?\"}'"
    ]
   },
   {
@@ -97,7 +95,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp delete hyp-jumpstart-endpoint --name endpoint-js-test-cli"
+    "!hyp delete hyp-jumpstart-endpoint --name endpoint-js"
    ]
   },
   {
diff --git a/examples/inference/CLI/inference-s3-model-e2e-cli.ipynb b/examples/inference/CLI/inference-s3-model-e2e-cli.ipynb
@@ -35,7 +35,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"
+    "!hyp set-cluster-context --cluster-name <cluster-name>"
    ]
   },
   {
@@ -47,38 +47,31 @@
    "source": [
     "!hyp create hyp-custom-endpoint \\\n",
     "  --version 1.0 \\\n",
-    "  --env \\\n",
-    "    '{ \\\n",
-    "      \"HF_MODEL_ID\": \"/opt/ml/model\", \\\n",
-    "      \"SAGEMAKER_PROGRAM\": \"inference.py\", \\\n",
-    "      \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code\", \\\n",
-    "      \"MODEL_CACHE_ROOT\": \"/opt/ml/model\", \\\n",
-    "      \"SAGEMAKER_ENV\": \"1\" \\\n",
-    "    }' \\\n",
+    "  --env '{ \"key1\": \"val1\", \"key2\": \"val2\"}' \\\n",
     "  --metric-collection-period 30 \\\n",
     "  --metric-name Invocations \\\n",
     "  --metric-stat Sum \\\n",
     "  --metric-type Average \\\n",
     "  --min-value 0.0 \\\n",
-    "  --cloud-watch-trigger-name SageMaker-Invocations-new \\\n",
+    "  --cloud-watch-trigger-name SageMaker-Invocations \\\n",
     "  --cloud-watch-trigger-namespace AWS/SageMaker \\\n",
     "  --target-value 10 \\\n",
     "  --use-cached-metrics true \\\n",
     "  --model-source-type s3 \\\n",
-    "  --model-location deepseek15b \\\n",
-    "  --s3-bucket-name test-model-s3-zhaoqi \\\n",
-    "  --s3-region us-east-2 \\\n",
-    "  --image-uri 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0 \\\n",
+    "  --model-location <model-location-folder> \\\n",
+    "  --s3-bucket-name <bucket-name> \\\n",
+    "  --s3-region <bucket-region> \\\n",
+    "  --image-uri <image-uri> \\\n",
     "  --model-volume-mount-name model-weights \\\n",
     "  --container-port 8080 \\\n",
     "  --resources-requests '{\"cpu\": \"30000m\", \"nvidia.com/gpu\": 1, \"memory\": \"100Gi\"}' \\\n",
     "  --resources-limits '{\"nvidia.com/gpu\": 1}' \\\n",
-    "  --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2 \\\n",
-    "  --instance-type ml.g5.8xlarge \\\n",
-    "  --dimensions '{\"EndpointName\": \"endpoint-s3-test-cli\", \"VariantName\": \"AllTraffic\"}' \\\n",
+    "  --tls-certificate-output-s3-uri s3://sample-bucket \\\n",
+    "  --instance-type <instance-type> \\\n",
+    "  --dimensions '{\"EndpointName\": \"endpoint-s3\", \"VariantName\": \"AllTraffic\"}' \\\n",
     "  --metrics-enabled true \\\n",
-    "  --endpoint-name endpoint-s3-test-cli \\\n",
-    "  --model-name deepseek15b-s3-test-cli"
+    "  --endpoint-name endpoint-s3 \\\n",
+    "  --model-name <model-name>"
    ]
   },
   {
@@ -98,7 +91,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp describe hyp-custom-endpoint --name endpoint-s3-test-cli"
+    "!hyp describe hyp-custom-endpoint --name endpoint-s3"
    ]
   },
   {
@@ -108,7 +101,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-s3-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'"
+    "!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-s3 --body '{\"inputs\":\"What is the capital of USA?\"}'"
    ]
   },
   {
@@ -118,7 +111,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!hyp delete hyp-custom-endpoint --name endpoint-s3-test-cli"
+    "!hyp delete hyp-custom-endpoint --name endpoint-s3"
    ]
   },
   {

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,10 @@`
`1`	`1`	`{`
`2`	`2`	`"cells": [`
`3`	`3`	`{`
`4`		`- "metadata": {},`
`5`	`4`	`"cell_type": "markdown",`
`6`		`- "source": "",`
`7`		`- "id": "f28ecfc84cef3505"`
	`5`	`+ "id": "f28ecfc84cef3505",`
	`6`	`+ "metadata": {},`
	`7`	`+ "source": []`
`8`	`8`	`},`
`9`	`9`	`{`
`10`	`10`	`"cell_type": "markdown",`
`@@ -41,7 +41,7 @@`
`41`	`41`	`"metadata": {},`
`42`	`42`	`"outputs": [],`
`43`	`43`	`"source": [`
`44`		`- "!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"`
	`44`	`+ "!hyp set-cluster-context --cluster-name <cluster-name>"`
`45`	`45`	`]`
`46`	`46`	`},`
`47`	`47`	`{`
`@@ -53,11 +53,9 @@`
`53`	`53`	`"source": [`
`54`	`54`	`"!hyp create hyp-jumpstart-endpoint \\\n",`
`55`	`55`	`" --version 1.0 \\\n",`
`56`		`- " --model-id deepseek-llm-r1-distill-qwen-1-5b \\\n",`
`57`		`- " --model-version 2.0.4 \\\n",`
`58`		`- " --instance-type ml.g5.8xlarge \\\n",`
`59`		`- " --endpoint-name endpoint-js-test-cli \\\n",`
`60`		`- " --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2"`
	`56`	`+ " --model-id <model-id> \\\n",`
	`57`	`+ " --instance-type <instance-type> \\\n",`
	`58`	`+ " --endpoint-name endpoint-js \\"`
`61`	`59`	`]`
`62`	`60`	`},`
`63`	`61`	`{`
`@@ -77,7 +75,7 @@`
`77`	`75`	`"metadata": {},`
`78`	`76`	`"outputs": [],`
`79`	`77`	`"source": [`
`80`		`- "!hyp describe hyp-jumpstart-endpoint --name endpoint-js-test-cli"`
	`78`	`+ "!hyp describe hyp-jumpstart-endpoint --name endpoint-js"`
`81`	`79`	`]`
`82`	`80`	`},`
`83`	`81`	`{`
`@@ -87,7 +85,7 @@`
`87`	`85`	`"metadata": {},`
`88`	`86`	`"outputs": [],`
`89`	`87`	`"source": [`
`90`		`- "!hyp invoke hyp-jumpstart-endpoint --endpoint-name endpoint-js-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'"`
	`88`	`+ "!hyp invoke hyp-jumpstart-endpoint --endpoint-name endpoint-js --body '{\"inputs\":\"What is the capital of USA?\"}'"`
`91`	`89`	`]`
`92`	`90`	`},`
`93`	`91`	`{`
`@@ -97,7 +95,7 @@`
`97`	`95`	`"metadata": {},`
`98`	`96`	`"outputs": [],`
`99`	`97`	`"source": [`
`100`		`- "!hyp delete hyp-jumpstart-endpoint --name endpoint-js-test-cli"`
	`98`	`+ "!hyp delete hyp-jumpstart-endpoint --name endpoint-js"`
`101`	`99`	`]`
`102`	`100`	`},`
`103`	`101`	`{`