Skip to content

Commit dee74e2

Browse files
committed
Update readme
1 parent 9a344e5 commit dee74e2

File tree

2 files changed

+50
-85
lines changed

2 files changed

+50
-85
lines changed

README.md

Lines changed: 48 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -337,24 +337,21 @@ Pre-trained Jumpstart models can be gotten from https://sagemaker.readthedocs.io
337337
from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
338338
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
339339

340-
model = Model(
341-
model_id="deepseek-llm-r1-distill-qwen-1-5b",
342-
model_version="2.0.4"
340+
model=Model(
341+
model_id='deepseek-llm-r1-distill-qwen-1-5b',
342+
model_version='2.0.4',
343343
)
344-
345-
server = Server(
346-
instance_type="ml.g5.8xlarge"
344+
server=Server(
345+
instance_type='ml.g5.8xlarge',
347346
)
347+
endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')
348+
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')
348349

349-
endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
350-
351-
tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
352-
353-
js_endpoint = HPJumpStartEndpoint(
350+
js_endpoint=HPJumpStartEndpoint(
354351
model=model,
355352
server=server,
356353
sage_maker_endpoint=endpoint_name,
357-
tls_config=tls_config
354+
tls_config=tls_config,
358355
)
359356

360357
js_endpoint.create()
@@ -370,51 +367,51 @@ print(response)
370367
```
371368
372369
373-
#### Creating a Custom Inference Endpoint
370+
#### Creating a Custom Inference Endpoint (with S3)
374371
375372
```
376-
from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
377-
from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
373+
from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, AutoScalingSpec, Metrics, S3Storage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker
374+
from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
378375

379-
model = Model(
380-
model_source_type="s3",
381-
model_location="test-pytorch-job/model.tar.gz",
382-
s3_bucket_name="my-bucket",
383-
s3_region="us-east-2",
384-
prefetch_enabled=True
376+
model_source_config = ModelSourceConfig(
377+
model_source_type='s3',
378+
model_location="<my-model-folder-in-s3>",
379+
s3_storage=S3Storage(
380+
bucket_name='<my-model-artifacts-bucket>',
381+
region='us-east-2',
382+
),
385383
)
386384

387-
server = Server(
388-
instance_type="ml.g5.8xlarge",
389-
image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
390-
container_port=8080,
391-
model_volume_mount_name="model-weights"
392-
)
385+
environment_variables = [
386+
EnvironmentVariables(name="HF_MODEL_ID", value="/opt/ml/model"),
387+
EnvironmentVariables(name="SAGEMAKER_PROGRAM", value="inference.py"),
388+
EnvironmentVariables(name="SAGEMAKER_SUBMIT_DIRECTORY", value="/opt/ml/model/code"),
389+
EnvironmentVariables(name="MODEL_CACHE_ROOT", value="/opt/ml/model"),
390+
EnvironmentVariables(name="SAGEMAKER_ENV", value="1"),
391+
]
393392

394-
resources = {
395-
"requests": {"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
396-
"limits": {"nvidia.com/gpu": 1}
397-
}
398-
399-
env = EnvironmentVariables(
400-
HF_MODEL_ID="/opt/ml/model",
401-
SAGEMAKER_PROGRAM="inference.py",
402-
SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
403-
MODEL_CACHE_ROOT="/opt/ml/model",
404-
SAGEMAKER_ENV="1"
393+
worker = Worker(
394+
image='763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0',
395+
model_volume_mount=ModelVolumeMount(
396+
name='model-weights',
397+
),
398+
model_invocation_port=ModelInvocationPort(container_port=8080),
399+
resources=Resources(
400+
requests={"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
401+
limits={"nvidia.com/gpu": 1}
402+
),
403+
environment_variables=environment_variables,
405404
)
406405

407-
endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")
408-
409-
tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
406+
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')
410407

411-
custom_endpoint = HPCustomEndpoint(
412-
model=model,
413-
server=server,
414-
resources=resources,
415-
environment=env,
416-
sage_maker_endpoint=endpoint_name,
408+
custom_endpoint = HPEndpoint(
409+
endpoint_name='<my-endpoint-name>',
410+
instance_type='ml.g5.8xlarge',
411+
model_name='deepseek15b-test-model-name',
417412
tls_config=tls_config,
413+
model_source_config=model_source_config,
414+
worker=worker,
418415
)
419416

420417
custom_endpoint.create()
@@ -431,19 +428,17 @@ print(response)
431428
#### Managing an Endpoint
432429
433430
```
434-
endpoint_iterator = HPJumpStartEndpoint.list()
435-
for endpoint in endpoint_iterator:
436-
print(endpoint.name, endpoint.status)
431+
endpoint_list = HPEndpoint.list()
432+
print(endpoint_list[0])
437433

438-
logs = js_endpoint.get_logs()
439-
print(logs)
434+
print(custom_endpoint.get_operator_logs(since_hours=0.5))
440435

441436
```
442437
443438
#### Deleting an Endpoint
444439
445440
```
446-
js_endpoint.delete()
441+
custom_endpoint.delete()
447442

448443
```
449444

examples/inference/SDK/inference-s3-model-e2e.ipynb

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"metadata": {},
3030
"outputs": [],
3131
"source": [
32-
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, PrometheusTrigger, AutoScalingSpec, ModelMetrics, Metrics, FsxStorage, S3Storage, ModelSourceConfig, Tags, TlsConfig, ConfigMapKeyRef, FieldRef, ResourceFieldRef, SecretKeyRef, ValueFrom, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Claims, Resources, Worker\n",
32+
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, AutoScalingSpec, Metrics, S3Storage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker\n",
3333
"from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint\n",
3434
"import yaml\n",
3535
"import time"
@@ -72,35 +72,7 @@
7272
" limits={\"nvidia.com/gpu\": 1}\n",
7373
" ),\n",
7474
" environment_variables=environment_variables,\n",
75-
")\n",
76-
"\n",
77-
"# Create dimensions\n",
78-
"dimensions = [\n",
79-
" Dimensions(name=\"EndpointName\", value=\"<my-endpoint-name>\"),\n",
80-
" Dimensions(name=\"VariantName\", value=\"AllTraffic\")\n",
81-
"]\n",
82-
"\n",
83-
"# Create CloudWatch trigger\n",
84-
"cloudwatch_trigger = CloudWatchTrigger(\n",
85-
" dimensions=dimensions,\n",
86-
" metric_collection_period=30,\n",
87-
" metric_name=\"Invocations\",\n",
88-
" metric_stat=\"Sum\",\n",
89-
" metric_type=\"Average\",\n",
90-
" min_value=0.0,\n",
91-
" name=\"SageMaker-Invocations\",\n",
92-
" namespace=\"AWS/SageMaker\",\n",
93-
" target_value=10,\n",
94-
" use_cached_metrics=False\n",
95-
")\n",
96-
"\n",
97-
"# Create autoscaling spec\n",
98-
"auto_scaling_spec = AutoScalingSpec(\n",
99-
" cloud_watch_trigger=cloudwatch_trigger\n",
100-
")\n",
101-
"\n",
102-
"# Create metrics\n",
103-
"metrics = Metrics(enabled=True)"
75+
")"
10476
]
10577
},
10678
{
@@ -117,8 +89,6 @@
11789
" tls_config=tls_config,\n",
11890
" model_source_config=model_source_config,\n",
11991
" worker=worker,\n",
120-
" auto_scaling_spec=auto_scaling_spec,\n",
121-
" metrics=metrics,\n",
12292
")"
12393
]
12494
},

0 commit comments

Comments
 (0)