@@ -337,24 +337,21 @@ Pre-trained Jumpstart models can be gotten from https://sagemaker.readthedocs.io
337337from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
338338from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
339339
340- model = Model(
341- model_id=" deepseek-llm-r1-distill-qwen-1-5b" ,
342- model_version=" 2.0.4"
340+ model= Model(
341+ model_id=' deepseek-llm-r1-distill-qwen-1-5b' ,
342+ model_version=' 2.0.4',
343343)
344-
345- server = Server(
346- instance_type="ml.g5.8xlarge"
344+ server=Server(
345+ instance_type='ml.g5.8xlarge',
347346)
347+ endpoint_name=SageMakerEndpoint(name='<my-endpoint-name >')
348+ tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket >')
348349
349- endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
350-
351- tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
352-
353- js_endpoint = HPJumpStartEndpoint(
350+ js_endpoint=HPJumpStartEndpoint(
354351 model=model,
355352 server=server,
356353 sage_maker_endpoint=endpoint_name,
357- tls_config=tls_config
354+ tls_config=tls_config,
358355)
359356
360357js_endpoint.create()
@@ -370,51 +367,51 @@ print(response)
370367```
371368
372369
373- #### Creating a Custom Inference Endpoint
370+ #### Creating a Custom Inference Endpoint (with S3)
374371
375372```
376- from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
377- from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
373+ from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, AutoScalingSpec, Metrics, S3Storage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker
374+ from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
378375
379- model = Model(
380- model_source_type="s3",
381- model_location="test-pytorch-job/model.tar.gz",
382- s3_bucket_name="my-bucket",
383- s3_region="us-east-2",
384- prefetch_enabled=True
376+ model_source_config = ModelSourceConfig(
377+ model_source_type='s3',
378+ model_location="<my-model-folder-in-s3 >",
379+ s3_storage=S3Storage(
380+ bucket_name='<my-model-artifacts-bucket >',
381+ region='us-east-2',
382+ ),
385383)
386384
387- server = Server(
388- instance_type="ml.g5.8xlarge",
389- image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
390- container_port=8080,
391- model_volume_mount_name="model-weights"
392- )
385+ environment_variables = [
386+ EnvironmentVariables(name="HF_MODEL_ID", value="/opt/ml/model"),
387+ EnvironmentVariables(name="SAGEMAKER_PROGRAM", value="inference.py"),
388+ EnvironmentVariables(name="SAGEMAKER_SUBMIT_DIRECTORY", value="/opt/ml/model/code"),
389+ EnvironmentVariables(name="MODEL_CACHE_ROOT", value="/opt/ml/model"),
390+ EnvironmentVariables(name="SAGEMAKER_ENV", value="1"),
391+ ]
393392
394- resources = {
395- "requests": {"cpu": "30000m", "nvidia. com/gpu": 1, "memory": "100Gi"} ,
396- "limits": {"nvidia.com/gpu": 1}
397- }
398-
399- env = EnvironmentVariables(
400- HF_MODEL_ID="/opt/ml/model",
401- SAGEMAKER_PROGRAM="inference.py" ,
402- SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
403- MODEL_CACHE_ROOT="/opt/ml/model" ,
404- SAGEMAKER_ENV="1"
393+ worker = Worker(
394+ image='763104351884.dkr.ecr.us-east-2.amazonaws. com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0' ,
395+ model_volume_mount=ModelVolumeMount(
396+ name='model-weights',
397+ ),
398+ model_invocation_port=ModelInvocationPort(container_port=8080),
399+ resources=Resources(
400+ requests={"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"} ,
401+ limits={"nvidia.com/gpu": 1}
402+ ) ,
403+ environment_variables=environment_variables,
405404)
406405
407- endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")
408-
409- tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
406+ tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name >')
410407
411- custom_endpoint = HPCustomEndpoint(
412- model=model,
413- server=server,
414- resources=resources,
415- environment=env,
416- sage_maker_endpoint=endpoint_name,
408+ custom_endpoint = HPEndpoint(
409+ endpoint_name='<my-endpoint-name >',
410+ instance_type='ml.g5.8xlarge',
411+ model_name='deepseek15b-test-model-name',
417412 tls_config=tls_config,
413+ model_source_config=model_source_config,
414+ worker=worker,
418415)
419416
420417custom_endpoint.create()
@@ -431,19 +428,17 @@ print(response)
431428#### Managing an Endpoint
432429
433430```
434- endpoint_iterator = HPJumpStartEndpoint.list()
435- for endpoint in endpoint_iterator:
436- print(endpoint.name, endpoint.status)
431+ endpoint_list = HPEndpoint.list()
432+ print(endpoint_list[ 0] )
437433
438- logs = js_endpoint.get_logs()
439- print(logs)
434+ print(custom_endpoint.get_operator_logs(since_hours=0.5))
440435
441436```
442437
443438#### Deleting an Endpoint
444439
445440```
446- js_endpoint .delete()
441+ custom_endpoint .delete()
447442
448443```
449444
0 commit comments