Skip to content

Commit 7cb2322

Browse files
author
AWS
committed
Amazon SageMaker Service Update: AWS SageMaker InferenceComponents now support rolling update deployments for Inference Components.
1 parent 33f7fbe commit 7cb2322

File tree

2 files changed

+84
-7
lines changed

2 files changed

+84
-7
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "Amazon SageMaker Service",
4+
"contributor": "",
5+
"description": "AWS SageMaker InferenceComponents now support rolling update deployments for Inference Components."
6+
}

services/sagemaker/src/main/resources/codegen-resources/service-2.json

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6513,7 +6513,7 @@
65136513
"BatchDeleteClusterNodesErrorList":{
65146514
"type":"list",
65156515
"member":{"shape":"BatchDeleteClusterNodesError"},
6516-
"max":99,
6516+
"max":3000,
65176517
"min":1
65186518
},
65196519
"BatchDeleteClusterNodesRequest":{
@@ -6529,7 +6529,7 @@
65296529
},
65306530
"NodeIds":{
65316531
"shape":"ClusterNodeIds",
6532-
"documentation":"<p>A list of node IDs to be deleted from the specified cluster.</p> <note> <p>For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.</p> </note>"
6532+
"documentation":"<p>A list of node IDs to be deleted from the specified cluster.</p> <note> <ul> <li> <p>For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.</p> </li> <li> <p>If you need to delete more than 99 instances, contact <a href=\"http://aws.amazon.com/contact-us/\">Support</a> for assistance.</p> </li> </ul> </note>"
65336533
}
65346534
}
65356535
},
@@ -7986,7 +7986,7 @@
79867986
"ClusterNodeIds":{
79877987
"type":"list",
79887988
"member":{"shape":"ClusterNodeId"},
7989-
"max":99,
7989+
"max":3000,
79907990
"min":1
79917991
},
79927992
"ClusterNodeRecovery":{
@@ -15959,6 +15959,10 @@
1595915959
"InferenceComponentStatus":{
1596015960
"shape":"InferenceComponentStatus",
1596115961
"documentation":"<p>The status of the inference component.</p>"
15962+
},
15963+
"LastDeploymentConfig":{
15964+
"shape":"InferenceComponentDeploymentConfig",
15965+
"documentation":"<p>The deployment and rollback settings that you assigned to the inference component.</p>"
1596215966
}
1596315967
}
1596415968
},
@@ -19977,7 +19981,7 @@
1997719981
"documentation":"<p>The list of tags that are associated with the experiment. You can use <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API to search on the tags.</p>"
1997819982
}
1997919983
},
19980-
"documentation":"<p>The properties of an experiment as returned by the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API.</p>"
19984+
"documentation":"<p>The properties of an experiment as returned by the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API. For information about experiments, see the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateExperiment.html\">CreateExperiment</a> API.</p>"
1998119985
},
1998219986
"ExperimentArn":{
1998319987
"type":"string",
@@ -22669,6 +22673,31 @@
2266922673
"max":2048,
2267022674
"min":20
2267122675
},
22676+
"InferenceComponentCapacitySize":{
22677+
"type":"structure",
22678+
"required":[
22679+
"Type",
22680+
"Value"
22681+
],
22682+
"members":{
22683+
"Type":{
22684+
"shape":"InferenceComponentCapacitySizeType",
22685+
"documentation":"<p>Specifies the endpoint capacity type.</p> <dl> <dt>COPY_COUNT</dt> <dd> <p>The endpoint activates based on the number of inference component copies.</p> </dd> <dt>CAPACITY_PERCENT</dt> <dd> <p>The endpoint activates based on the specified percentage of capacity.</p> </dd> </dl>"
22686+
},
22687+
"Value":{
22688+
"shape":"CapacitySizeValue",
22689+
"documentation":"<p>Defines the capacity size, either as a number of inference component copies or a capacity percentage.</p>"
22690+
}
22691+
},
22692+
"documentation":"<p>Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following:</p> <ul> <li> <p>A count of inference component copies </p> </li> <li> <p>The overall percentage or your fleet </p> </li> </ul> <p>For a rollback strategy, if you don't specify the fields in this object, or if you set the <code>Value</code> parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet.</p>"
22693+
},
22694+
"InferenceComponentCapacitySizeType":{
22695+
"type":"string",
22696+
"enum":[
22697+
"COPY_COUNT",
22698+
"CAPACITY_PERCENT"
22699+
]
22700+
},
2267222701
"InferenceComponentComputeResourceRequirements":{
2267322702
"type":"structure",
2267422703
"required":["MinMemoryRequiredInMb"],
@@ -22729,6 +22758,18 @@
2272922758
"type":"integer",
2273022759
"min":0
2273122760
},
22761+
"InferenceComponentDeploymentConfig":{
22762+
"type":"structure",
22763+
"required":["RollingUpdatePolicy"],
22764+
"members":{
22765+
"RollingUpdatePolicy":{
22766+
"shape":"InferenceComponentRollingUpdatePolicy",
22767+
"documentation":"<p>Specifies a rolling deployment strategy for updating a SageMaker AI endpoint.</p>"
22768+
},
22769+
"AutoRollbackConfiguration":{"shape":"AutoRollbackConfig"}
22770+
},
22771+
"documentation":"<p>The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings.</p>"
22772+
},
2273222773
"InferenceComponentName":{
2273322774
"type":"string",
2273422775
"max":63,
@@ -22739,6 +22780,32 @@
2273922780
"max":63,
2274022781
"pattern":"[a-zA-Z0-9-]+"
2274122782
},
22783+
"InferenceComponentRollingUpdatePolicy":{
22784+
"type":"structure",
22785+
"required":[
22786+
"MaximumBatchSize",
22787+
"WaitIntervalInSeconds"
22788+
],
22789+
"members":{
22790+
"MaximumBatchSize":{
22791+
"shape":"InferenceComponentCapacitySize",
22792+
"documentation":"<p>The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component.</p>"
22793+
},
22794+
"WaitIntervalInSeconds":{
22795+
"shape":"WaitIntervalInSeconds",
22796+
"documentation":"<p>The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet.</p>"
22797+
},
22798+
"MaximumExecutionTimeoutInSeconds":{
22799+
"shape":"MaximumExecutionTimeoutInSeconds",
22800+
"documentation":"<p>The time limit for the total deployment. Exceeding this limit causes a timeout.</p>"
22801+
},
22802+
"RollbackMaximumBatchSize":{
22803+
"shape":"InferenceComponentCapacitySize",
22804+
"documentation":"<p>The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback.</p>"
22805+
}
22806+
},
22807+
"documentation":"<p>Specifies a rolling deployment strategy for updating a SageMaker AI inference component.</p>"
22808+
},
2274222809
"InferenceComponentRuntimeConfig":{
2274322810
"type":"structure",
2274422811
"required":["CopyCount"],
@@ -29780,7 +29847,7 @@
2978029847
"members":{
2978129848
"ModelPackageName":{
2978229849
"shape":"EntityName",
29783-
"documentation":"<p>The name of the model.</p>"
29850+
"documentation":"<p>The name of the model package. The name can be as follows:</p> <ul> <li> <p>For a versioned model, the name is automatically generated by SageMaker Model Registry and follows the format '<code>ModelPackageGroupName/ModelPackageVersion</code>'.</p> </li> <li> <p>For an unversioned model, you must provide the name.</p> </li> </ul>"
2978429851
},
2978529852
"ModelPackageGroupName":{
2978629853
"shape":"EntityName",
@@ -29897,7 +29964,7 @@
2989729964
"documentation":"<p>Indicates if you want to skip model validation.</p>"
2989829965
}
2989929966
},
29900-
"documentation":"<p>A versioned model that can be deployed for SageMaker inference.</p>"
29967+
"documentation":"<p>A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following.</p> <ul> <li> <p>Versioned model: A part of a model package group in Model Registry.</p> </li> <li> <p>Unversioned model: Not part of a model package group and used in Amazon Web Services Marketplace.</p> </li> </ul> <p>For more information, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModelPackage.html\"> <code>CreateModelPackage</code> </a>.</p>"
2990129968
},
2990229969
"ModelPackageArn":{
2990329970
"type":"string",
@@ -30011,7 +30078,7 @@
3001130078
"documentation":"<p>A list of the tags associated with the model group. For more information, see <a href=\"https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html\">Tagging Amazon Web Services resources</a> in the <i>Amazon Web Services General Reference Guide</i>.</p>"
3001230079
}
3001330080
},
30014-
"documentation":"<p>A group of versioned models in the model registry.</p>"
30081+
"documentation":"<p>A group of versioned models in the Model Registry.</p>"
3001530082
},
3001630083
"ModelPackageGroupArn":{
3001730084
"type":"string",
@@ -40889,6 +40956,10 @@
4088940956
"RuntimeConfig":{
4089040957
"shape":"InferenceComponentRuntimeConfig",
4089140958
"documentation":"<p>Runtime settings for a model that is deployed with an inference component.</p>"
40959+
},
40960+
"DeploymentConfig":{
40961+
"shape":"InferenceComponentDeploymentConfig",
40962+
"documentation":"<p>The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings.</p>"
4089240963
}
4089340964
}
4089440965
},

0 commit comments

Comments
 (0)