Skip to content

Commit 9a344e5

Browse files
committed
Update inferenece SDK examples
1 parent 8034a24 commit 9a344e5

File tree

3 files changed

+59
-46
lines changed

3 files changed

+59
-46
lines changed

examples/inference/SDK/inference-fsx-model-e2e.ipynb

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,19 @@
77
"metadata": {},
88
"outputs": [],
99
"source": [
10-
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager\n",
11-
"\n",
12-
"HyperPodManager.list_clusters(region='us-east-2')\n",
13-
"HyperPodManager.set_context('<hyperpod-cluster-name>', region='us-east-2')"
10+
"from sagemaker.hyperpod import list_clusters, set_cluster_context\n",
11+
"list_clusters(region='us-east-2')"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"id": "765ef3fd",
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"# choose the HP cluster\n",
22+
"set_cluster_context('<my-cluster>', region='us-east-2')"
1423
]
1524
},
1625
{
@@ -20,7 +29,7 @@
2029
"metadata": {},
2130
"outputs": [],
2231
"source": [
23-
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, PrometheusTrigger, AutoScalingSpec, ModelMetrics, Metrics, FsxStorage, ModelSourceConfig, Tags, TlsConfig, ConfigMapKeyRef, FieldRef, ResourceFieldRef, SecretKeyRef, ValueFrom, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Claims, Resources, Worker\n",
32+
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import FsxStorage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker\n",
2433
"from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint\n",
2534
"import yaml\n",
2635
"import time"
@@ -33,13 +42,13 @@
3342
"metadata": {},
3443
"outputs": [],
3544
"source": [
36-
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<your-tls-bucket-name>')\n",
45+
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')\n",
3746
"\n",
3847
"model_source_config = ModelSourceConfig(\n",
3948
" model_source_type='fsx',\n",
40-
" model_location=\"<your-model-folder-in-fsx>\",\n",
49+
" model_location=\"<my-model-folder-in-fsx>\",\n",
4150
" fsx_storage=FsxStorage(\n",
42-
" file_system_id='<your-fs-id>'\n",
51+
" file_system_id='<my-fs-id>'\n",
4352
" ),\n",
4453
")\n",
4554
"\n",
@@ -73,7 +82,7 @@
7382
"outputs": [],
7483
"source": [
7584
"fsx_endpoint = HPEndpoint(\n",
76-
" endpoint_name='test-endpoint-name-fsx-pysdk',\n",
85+
" endpoint_name='<my-endpoint-name>',\n",
7786
" instance_type='ml.g5.8xlarge',\n",
7887
" model_name='deepseek15b-fsx-test-pysdk',\n",
7988
" tls_config=tls_config,\n",
@@ -165,7 +174,7 @@
165174
"metadata": {},
166175
"outputs": [],
167176
"source": [
168-
"endpoint = HPEndpoint.get(name='<your-endpoint-name>')"
177+
"endpoint = HPEndpoint.get(name='<my-endpoint-name>')"
169178
]
170179
},
171180
{

examples/inference/SDK/inference-jumpstart-e2e.ipynb

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,14 @@
88
"## Inference Operator PySDK E2E Expereience (JumpStart model)"
99
]
1010
},
11-
{
12-
"cell_type": "markdown",
13-
"id": "1b3ce5c1-3c3d-4139-b7ae-042f360f3032",
14-
"metadata": {},
15-
"source": [
16-
"<b>Prerequisite:</b> Data scientists should list clusters and set cluster context"
17-
]
18-
},
1911
{
2012
"cell_type": "code",
2113
"execution_count": null,
2214
"id": "e22c86d6-0d3d-4c51-bef0-3f4c59ce111c",
2315
"metadata": {},
2416
"outputs": [],
2517
"source": [
26-
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager"
18+
"from sagemaker.hyperpod import list_clusters, set_cluster_context"
2719
]
2820
},
2921
{
@@ -33,8 +25,7 @@
3325
"metadata": {},
3426
"outputs": [],
3527
"source": [
36-
"#Set region \n",
37-
"region = \"us-west-2\""
28+
"list_clusters(region='us-east-2')"
3829
]
3930
},
4031
{
@@ -44,8 +35,8 @@
4435
"metadata": {},
4536
"outputs": [],
4637
"source": [
47-
"# choose the HP cluster user works on\n",
48-
"HyperPodManager.set_context('sagemaker-hyperpod-eks-cluster-demo-05-01', region=region)"
38+
"# choose the HP cluster\n",
39+
"set_cluster_context('<my-cluster>', region='us-east-2')"
4940
]
5041
},
5142
{
@@ -67,7 +58,7 @@
6758
"from jumpstart_public_hub_visualization_utils import get_all_public_hub_model_data\n",
6859
"\n",
6960
"# Load and display SageMaker public hub models\n",
70-
"get_all_public_hub_model_data(region=\"us-west-2\")"
61+
"get_all_public_hub_model_data(region=\"us-east-2\")"
7162
]
7263
},
7364
{
@@ -122,8 +113,8 @@
122113
"server=Server(\n",
123114
" instance_type='ml.g5.8xlarge',\n",
124115
")\n",
125-
"endpoint_name=SageMakerEndpoint(name='deepsek7bsme-testing-jumpstart-7-1')\n",
126-
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://tls-bucket-inf1-beta2')\n",
116+
"endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')\n",
117+
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')\n",
127118
"\n",
128119
"# create spec\n",
129120
"js_endpoint=HPJumpStartEndpoint(\n",
@@ -230,7 +221,7 @@
230221
"outputs": [],
231222
"source": [
232223
"# output is similar to kubectl describe jumpstartmodel\n",
233-
"endpoint = HPJumpStartEndpoint.get(name='deepseek-llm-r1-distill-qwen-1-5b')\n",
224+
"endpoint = HPJumpStartEndpoint.get(name='<my-endpoint-name>')\n",
234225
"print_yaml(endpoint)"
235226
]
236227
},
@@ -265,10 +256,7 @@
265256
"outputs": [],
266257
"source": [
267258
"# get operator logs\n",
268-
"print(js_endpoint.get_operator_logs(since_hours=1))\n",
269-
"\n",
270-
"# get specific pod log\n",
271-
"# js_endpoint.get_logs(pod='pod-name')"
259+
"print(js_endpoint.get_operator_logs(since_hours=0.1))"
272260
]
273261
},
274262
{

examples/inference/SDK/inference-s3-model-e2e.ipynb

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,19 @@
77
"metadata": {},
88
"outputs": [],
99
"source": [
10-
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager\n",
11-
"\n",
12-
"HyperPodManager.list_clusters(region='us-east-2')\n",
13-
"HyperPodManager.set_context('<hyperpod-cluster-name>', region='us-east-2')"
10+
"from sagemaker.hyperpod import list_clusters, set_cluster_context\n",
11+
"list_clusters(region='us-east-2')"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"id": "14cd61ab",
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"# choose the HP cluster\n",
22+
"set_cluster_context('<my-cluster>', region='us-east-2')"
1423
]
1524
},
1625
{
@@ -33,13 +42,13 @@
3342
"metadata": {},
3443
"outputs": [],
3544
"source": [
36-
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<your-tls-bucket-name>')\n",
45+
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')\n",
3746
"\n",
3847
"model_source_config = ModelSourceConfig(\n",
3948
" model_source_type='s3',\n",
40-
" model_location=\"<your-model-folder-in-s3>\",\n",
49+
" model_location=\"<my-model-folder-in-s3>\",\n",
4150
" s3_storage=S3Storage(\n",
42-
" bucket_name='<your-model-artifacts-bucket>',\n",
51+
" bucket_name='<my-model-artifacts-bucket>',\n",
4352
" region='us-east-2',\n",
4453
" ),\n",
4554
")\n",
@@ -67,7 +76,7 @@
6776
"\n",
6877
"# Create dimensions\n",
6978
"dimensions = [\n",
70-
" Dimensions(name=\"EndpointName\", value=\"<your-endpoint-name>\"),\n",
79+
" Dimensions(name=\"EndpointName\", value=\"<my-endpoint-name>\"),\n",
7180
" Dimensions(name=\"VariantName\", value=\"AllTraffic\")\n",
7281
"]\n",
7382
"\n",
@@ -102,7 +111,7 @@
102111
"outputs": [],
103112
"source": [
104113
"s3_endpoint = HPEndpoint(\n",
105-
" endpoint_name='s3-test-endpoint-name',\n",
114+
" endpoint_name='<my-endpoint-name>',\n",
106115
" instance_type='ml.g5.8xlarge',\n",
107116
" model_name='deepseek15b-test-model-name', \n",
108117
" tls_config=tls_config,\n",
@@ -120,7 +129,7 @@
120129
"metadata": {},
121130
"outputs": [],
122131
"source": [
123-
"s3_endpoint.create(debug=True)"
132+
"s3_endpoint.create()"
124133
]
125134
},
126135
{
@@ -193,7 +202,17 @@
193202
"outputs": [],
194203
"source": [
195204
"endpoint_list = HPEndpoint.list()\n",
196-
"print_yaml(endpoint_list[1])"
205+
"print_yaml(endpoint_list[0])"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": null,
211+
"id": "660e8d47",
212+
"metadata": {},
213+
"outputs": [],
214+
"source": [
215+
"s3_endpoint = HPEndpoint.get(name='<my-endpoint-name>')"
197216
]
198217
},
199218
{
@@ -206,10 +225,7 @@
206225
"outputs": [],
207226
"source": [
208227
"# get operator logs\n",
209-
"print(s3_endpoint.get_operator_logs(since_hours=0.5))\n",
210-
"\n",
211-
"# get specific pod log\n",
212-
"# js_endpoint.get_logs(pod='pod-name')"
228+
"print(s3_endpoint.get_operator_logs(since_hours=0.1))"
213229
]
214230
},
215231
{

0 commit comments

Comments
 (0)