@@ -2981,6 +2981,9 @@ def create_endpoint_config(
29812981 tags = None ,
29822982 kms_key = None ,
29832983 data_capture_config_dict = None ,
2984+ volume_size = None ,
2985+ model_data_download_timeout = None ,
2986+ container_startup_health_check_timeout = None ,
29842987 ):
29852988 """Create an Amazon SageMaker endpoint configuration.
29862989
@@ -3004,6 +3007,16 @@ def create_endpoint_config(
30043007 attached to the instance hosting the endpoint.
30053008 data_capture_config_dict (dict): Specifies configuration related to Endpoint data
30063009 capture for use with Amazon SageMaker Model Monitoring. Default: None.
3010+ volume_size (int): The size, in GB, of the ML storage volume attached to individual
3011+ inference instance associated with the production variant. Currenly only Amazon EBS
3012+ gp2 storage volumes are supported.
3013+ model_data_download_timeout (int): The timeout value, in seconds, to download and
3014+ extract model data from Amazon S3 to the individual inference instance associated
3015+ with this production variant.
3016+ container_startup_health_check_timeout (int): The timeout value, in seconds, for your
3017+ inference container to pass health check by SageMaker Hosting. For more information
3018+ about health check see:
3019+ https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
30073020
30083021 Example:
30093022 >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -3025,6 +3038,9 @@ def create_endpoint_config(
30253038 instance_type ,
30263039 initial_instance_count ,
30273040 accelerator_type = accelerator_type ,
3041+ volume_size = volume_size ,
3042+ model_data_download_timeout = model_data_download_timeout ,
3043+ container_startup_health_check_timeout = container_startup_health_check_timeout ,
30283044 )
30293045 ],
30303046 }
@@ -4636,6 +4652,9 @@ def production_variant(
46364652 initial_weight = 1 ,
46374653 accelerator_type = None ,
46384654 serverless_inference_config = None ,
4655+ volume_size = None ,
4656+ model_data_download_timeout = None ,
4657+ container_startup_health_check_timeout = None ,
46394658):
46404659 """Create a production variant description suitable for use in a ``ProductionVariant`` list.
46414660
@@ -4657,7 +4676,16 @@ def production_variant(
46574676 serverless_inference_config (dict): Specifies configuration dict related to serverless
46584677 endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
46594678 object (default: None)
4660-
4679+ volume_size (int): The size, in GB, of the ML storage volume attached to individual
4680+ inference instance associated with the production variant. Currenly only Amazon EBS
4681+ gp2 storage volumes are supported.
4682+ model_data_download_timeout (int): The timeout value, in seconds, to download and extract
4683+ model data from Amazon S3 to the individual inference instance associated with this
4684+ production variant.
4685+ container_startup_health_check_timeout (int): The timeout value, in seconds, for your
4686+ inference container to pass health check by SageMaker Hosting. For more information
4687+ about health check see:
4688+ https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
46614689 Returns:
46624690 dict[str, str]: An SageMaker ``ProductionVariant`` description
46634691 """
@@ -4676,6 +4704,12 @@ def production_variant(
46764704 initial_instance_count = initial_instance_count or 1
46774705 production_variant_configuration ["InitialInstanceCount" ] = initial_instance_count
46784706 production_variant_configuration ["InstanceType" ] = instance_type
4707+ update_args (
4708+ production_variant_configuration ,
4709+ VolumeSizeInGB = volume_size ,
4710+ ModelDataDownloadTimeoutInSeconds = model_data_download_timeout ,
4711+ ContainerStartupHealthCheckTimeoutInSeconds = container_startup_health_check_timeout ,
4712+ )
46794713
46804714 return production_variant_configuration
46814715
0 commit comments