Skip to content

Commit ae9301c

Browse files
committed
update example notebook for inference CLI
1 parent 29a16c5 commit ae9301c

File tree

3 files changed

+37
-51
lines changed

3 files changed

+37
-51
lines changed

examples/inference/CLI/inference-fsx-model-e2e-cli.ipynb

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"metadata": {},
3636
"outputs": [],
3737
"source": [
38-
"!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"
38+
"!hyp set-cluster-context --cluster-name <cluster-name>"
3939
]
4040
},
4141
{
@@ -47,24 +47,19 @@
4747
"source": [
4848
"!hyp create hyp-custom-endpoint \\\n",
4949
" --version 1.0 \\\n",
50-
" --env \\\n",
51-
" '{\"HF_MODEL_ID\":\"/opt/ml/model\", \\\n",
52-
" \"SAGEMAKER_PROGRAM\":\"inference.py\", \\\n",
53-
" \"SAGEMAKER_SUBMIT_DIRECTORY\":\"/opt/ml/model/code\", \\\n",
54-
" \"MODEL_CACHE_ROOT\":\"/opt/ml/model\", \\\n",
55-
" \"SAGEMAKER_ENV\":\"1\"}' \\\n",
50+
" --env '{ \"key1\": \"val1\", \"key2\": \"val2\"}' \\\n",
5651
" --model-source-type fsx \\\n",
57-
" --model-location deepseek-1-5b \\\n",
58-
" --fsx-file-system-id fs-0e6a92495c35a81f2 \\\n",
59-
" --image-uri 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0 \\\n",
52+
" --model-location <model-location-folder> \\\n",
53+
" --fsx-file-system-id <fsx-file-system-id> \\\n",
54+
" --image-uri <image-uri> \\\n",
6055
" --model-volume-mount-name model-weights \\\n",
6156
" --container-port 8080 \\\n",
6257
" --resources-requests '{\"cpu\": \"4\", \"nvidia.com/gpu\": 1, \"memory\": \"32Gi\"}' \\\n",
6358
" --resources-limits '{\"nvidia.com/gpu\": 1}' \\\n",
64-
" --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2 \\\n",
65-
" --instance-type ml.g5.8xlarge \\\n",
66-
" --endpoint-name endpoint-fsx-test-cli \\\n",
67-
" --model-name deepseek15b-fsx-test-cli"
59+
" --tls-certificate-output-s3-uri s3://sample-bucket \\\n",
60+
" --instance-type <instance-type> \\\n",
61+
" --endpoint-name endpoint-fsx \\\n",
62+
" --model-name <model-name>"
6863
]
6964
},
7065
{
@@ -84,7 +79,7 @@
8479
"metadata": {},
8580
"outputs": [],
8681
"source": [
87-
"!hyp describe hyp-custom-endpoint --name endpoint-fsx-test-cli"
82+
"!hyp describe hyp-custom-endpoint --name endpoint-fsx"
8883
]
8984
},
9085
{
@@ -94,7 +89,7 @@
9489
"metadata": {},
9590
"outputs": [],
9691
"source": [
97-
"!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-fsx-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'"
92+
"!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-fsx --body '{\"inputs\":\"What is the capital of USA?\"}'"
9893
]
9994
},
10095
{
@@ -104,7 +99,7 @@
10499
"metadata": {},
105100
"outputs": [],
106101
"source": [
107-
"!hyp delete hyp-custom-endpoint --name endpoint-fsx-test-cli"
102+
"!hyp delete hyp-custom-endpoint --name endpoint-fsx"
108103
]
109104
},
110105
{

examples/inference/CLI/inference-jumpstart-e2e-cli.ipynb

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"cells": [
33
{
4-
"metadata": {},
54
"cell_type": "markdown",
6-
"source": "",
7-
"id": "f28ecfc84cef3505"
5+
"id": "f28ecfc84cef3505",
6+
"metadata": {},
7+
"source": []
88
},
99
{
1010
"cell_type": "markdown",
@@ -41,7 +41,7 @@
4141
"metadata": {},
4242
"outputs": [],
4343
"source": [
44-
"!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"
44+
"!hyp set-cluster-context --cluster-name <cluster-name>"
4545
]
4646
},
4747
{
@@ -53,11 +53,9 @@
5353
"source": [
5454
"!hyp create hyp-jumpstart-endpoint \\\n",
5555
" --version 1.0 \\\n",
56-
" --model-id deepseek-llm-r1-distill-qwen-1-5b \\\n",
57-
" --model-version 2.0.4 \\\n",
58-
" --instance-type ml.g5.8xlarge \\\n",
59-
" --endpoint-name endpoint-js-test-cli \\\n",
60-
" --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2"
56+
" --model-id <model-id> \\\n",
57+
" --instance-type <instance-type> \\\n",
58+
" --endpoint-name endpoint-js \\"
6159
]
6260
},
6361
{
@@ -77,7 +75,7 @@
7775
"metadata": {},
7876
"outputs": [],
7977
"source": [
80-
"!hyp describe hyp-jumpstart-endpoint --name endpoint-js-test-cli"
78+
"!hyp describe hyp-jumpstart-endpoint --name endpoint-js"
8179
]
8280
},
8381
{
@@ -87,7 +85,7 @@
8785
"metadata": {},
8886
"outputs": [],
8987
"source": [
90-
"!hyp invoke hyp-jumpstart-endpoint --endpoint-name endpoint-js-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'"
88+
"!hyp invoke hyp-jumpstart-endpoint --endpoint-name endpoint-js --body '{\"inputs\":\"What is the capital of USA?\"}'"
9189
]
9290
},
9391
{
@@ -97,7 +95,7 @@
9795
"metadata": {},
9896
"outputs": [],
9997
"source": [
100-
"!hyp delete hyp-jumpstart-endpoint --name endpoint-js-test-cli"
98+
"!hyp delete hyp-jumpstart-endpoint --name endpoint-js"
10199
]
102100
},
103101
{

examples/inference/CLI/inference-s3-model-e2e-cli.ipynb

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"metadata": {},
3636
"outputs": [],
3737
"source": [
38-
"!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1"
38+
"!hyp set-cluster-context --cluster-name <cluster-name>"
3939
]
4040
},
4141
{
@@ -47,38 +47,31 @@
4747
"source": [
4848
"!hyp create hyp-custom-endpoint \\\n",
4949
" --version 1.0 \\\n",
50-
" --env \\\n",
51-
" '{ \\\n",
52-
" \"HF_MODEL_ID\": \"/opt/ml/model\", \\\n",
53-
" \"SAGEMAKER_PROGRAM\": \"inference.py\", \\\n",
54-
" \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code\", \\\n",
55-
" \"MODEL_CACHE_ROOT\": \"/opt/ml/model\", \\\n",
56-
" \"SAGEMAKER_ENV\": \"1\" \\\n",
57-
" }' \\\n",
50+
" --env '{ \"key1\": \"val1\", \"key2\": \"val2\"}' \\\n",
5851
" --metric-collection-period 30 \\\n",
5952
" --metric-name Invocations \\\n",
6053
" --metric-stat Sum \\\n",
6154
" --metric-type Average \\\n",
6255
" --min-value 0.0 \\\n",
63-
" --cloud-watch-trigger-name SageMaker-Invocations-new \\\n",
56+
" --cloud-watch-trigger-name SageMaker-Invocations \\\n",
6457
" --cloud-watch-trigger-namespace AWS/SageMaker \\\n",
6558
" --target-value 10 \\\n",
6659
" --use-cached-metrics true \\\n",
6760
" --model-source-type s3 \\\n",
68-
" --model-location deepseek15b \\\n",
69-
" --s3-bucket-name test-model-s3-zhaoqi \\\n",
70-
" --s3-region us-east-2 \\\n",
71-
" --image-uri 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0 \\\n",
61+
" --model-location <model-location-folder> \\\n",
62+
" --s3-bucket-name <bucket-name> \\\n",
63+
" --s3-region <bucket-region> \\\n",
64+
" --image-uri <image-uri> \\\n",
7265
" --model-volume-mount-name model-weights \\\n",
7366
" --container-port 8080 \\\n",
7467
" --resources-requests '{\"cpu\": \"30000m\", \"nvidia.com/gpu\": 1, \"memory\": \"100Gi\"}' \\\n",
7568
" --resources-limits '{\"nvidia.com/gpu\": 1}' \\\n",
76-
" --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2 \\\n",
77-
" --instance-type ml.g5.8xlarge \\\n",
78-
" --dimensions '{\"EndpointName\": \"endpoint-s3-test-cli\", \"VariantName\": \"AllTraffic\"}' \\\n",
69+
" --tls-certificate-output-s3-uri s3://sample-bucket \\\n",
70+
" --instance-type <instance-type> \\\n",
71+
" --dimensions '{\"EndpointName\": \"endpoint-s3\", \"VariantName\": \"AllTraffic\"}' \\\n",
7972
" --metrics-enabled true \\\n",
80-
" --endpoint-name endpoint-s3-test-cli \\\n",
81-
" --model-name deepseek15b-s3-test-cli"
73+
" --endpoint-name endpoint-s3 \\\n",
74+
" --model-name <model-name>"
8275
]
8376
},
8477
{
@@ -98,7 +91,7 @@
9891
"metadata": {},
9992
"outputs": [],
10093
"source": [
101-
"!hyp describe hyp-custom-endpoint --name endpoint-s3-test-cli"
94+
"!hyp describe hyp-custom-endpoint --name endpoint-s3"
10295
]
10396
},
10497
{
@@ -108,7 +101,7 @@
108101
"metadata": {},
109102
"outputs": [],
110103
"source": [
111-
"!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-s3-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'"
104+
"!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-s3 --body '{\"inputs\":\"What is the capital of USA?\"}'"
112105
]
113106
},
114107
{
@@ -118,7 +111,7 @@
118111
"metadata": {},
119112
"outputs": [],
120113
"source": [
121-
"!hyp delete hyp-custom-endpoint --name endpoint-s3-test-cli"
114+
"!hyp delete hyp-custom-endpoint --name endpoint-s3"
122115
]
123116
},
124117
{

0 commit comments

Comments
 (0)