This release adds a new Neuron driver option in InferenceAmiVersion parameter for ProductionVariant. Additionally, it adds support for fetching model lifecycle status in the ListModelPackages API. Users can now use this API to view the lifecycle stage of models that have been shared with them.

aws-sdk-dotnet-automation · aws-sdk-dotnet-automation · commit d0189e3ea414 · 2025-04-18T18:16:42.000Z
diff --git a/generator/ServiceModels/sagemaker/sagemaker-2017-07-24.api.json b/generator/ServiceModels/sagemaker/sagemaker-2017-07-24.api.json
@@ -18596,7 +18596,8 @@
         "ModelPackageDescription":{"shape":"EntityDescription"},
         "CreationTime":{"shape":"CreationTime"},
         "ModelPackageStatus":{"shape":"ModelPackageStatus"},
-        "ModelApprovalStatus":{"shape":"ModelApprovalStatus"}
+        "ModelApprovalStatus":{"shape":"ModelApprovalStatus"},
+        "ModelLifeCycle":{"shape":"ModelLifeCycle"}
       }
     },
     "ModelPackageSummaryList":{
@@ -20803,7 +20804,8 @@
       "enum":[
         "al2-ami-sagemaker-inference-gpu-2",
         "al2-ami-sagemaker-inference-gpu-2-1",
-        "al2-ami-sagemaker-inference-gpu-3-1"
+        "al2-ami-sagemaker-inference-gpu-3-1",
+        "al2-ami-sagemaker-inference-neuron-2"
       ]
     },
     "ProductionVariantInstanceType":{
diff --git a/generator/ServiceModels/sagemaker/sagemaker-2017-07-24.docs.json b/generator/ServiceModels/sagemaker/sagemaker-2017-07-24.docs.json
@@ -10095,7 +10095,7 @@
     "MaxPendingTimeInSeconds": {
       "base": "Maximum job scheduler pending time in seconds.",
       "refs": {
-        "StoppingCondition$MaxPendingTimeInSeconds": "<p>The maximum length of time, in seconds, that a training or compilation job can be pending before it is stopped.</p>"
+        "StoppingCondition$MaxPendingTimeInSeconds": "<p>The maximum length of time, in seconds, that a training or compilation job can be pending before it is stopped.</p> <note> <p>When working with training jobs that use capacity from <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/reserve-capacity-with-training-plans.html\">training plans</a>, not all <code>Pending</code> job states count against the <code>MaxPendingTimeInSeconds</code> limit. The following scenarios do not increment the <code>MaxPendingTimeInSeconds</code> counter:</p> <ul> <li> <p>The plan is in a <code>Scheduled</code> state: Jobs queued (in <code>Pending</code> status) before a plan's start date (waiting for scheduled start time)</p> </li> <li> <p>Between capacity reservations: Jobs temporarily back to <code>Pending</code> status between two capacity reservation periods</p> </li> </ul> <p> <code>MaxPendingTimeInSeconds</code> only increments when jobs are actively waiting for capacity in an <code>Active</code> plan.</p> </note>"
       }
     },
     "MaxPercentageOfInputDatasetLabeled": {
@@ -10781,6 +10781,7 @@
         "CreateModelPackageInput$ModelLifeCycle": "<p> A structure describing the current state of the model in its life cycle. </p>",
         "DescribeModelPackageOutput$ModelLifeCycle": "<p> A structure describing the current state of the model in its life cycle. </p>",
         "ModelPackage$ModelLifeCycle": "<p> A structure describing the current state of the model in its life cycle. </p>",
+        "ModelPackageSummary$ModelLifeCycle": null,
         "UpdateModelPackageInput$ModelLifeCycle": "<p> A structure describing the current state of the model in its life cycle. </p>"
       }
     },
@@ -13286,7 +13287,7 @@
     "ProductionVariantInferenceAmiVersion": {
       "base": null,
       "refs": {
-        "ProductionVariant$InferenceAmiVersion": "<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> </dl>"
+        "ProductionVariant$InferenceAmiVersion": "<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-neuron-2</dt> <dd> <ul> <li> <p>Accelerator: Inferentia2 and Trainium</p> </li> <li> <p>Neuron driver version: 2.19</p> </li> </ul> </dd> </dl>"
       }
     },
     "ProductionVariantInstanceType": {
diff --git a/generator/ServiceModels/sagemaker/sagemaker-2017-07-24.normal.json b/generator/ServiceModels/sagemaker/sagemaker-2017-07-24.normal.json
@@ -30368,7 +30368,8 @@
         "ModelApprovalStatus":{
           "shape":"ModelApprovalStatus",
           "documentation":"<p>The approval status of the model. This can be one of the following values.</p> <ul> <li> <p> <code>APPROVED</code> - The model is approved</p> </li> <li> <p> <code>REJECTED</code> - The model is rejected.</p> </li> <li> <p> <code>PENDING_MANUAL_APPROVAL</code> - The model is waiting for manual approval.</p> </li> </ul>"
-        }
+        },
+        "ModelLifeCycle":{"shape":"ModelLifeCycle"}
       },
       "documentation":"<p>Provides summary information about a model package.</p>"
     },
@@ -33816,7 +33817,7 @@
         },
         "InferenceAmiVersion":{
           "shape":"ProductionVariantInferenceAmiVersion",
-          "documentation":"<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> </dl>"
+          "documentation":"<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-neuron-2</dt> <dd> <ul> <li> <p>Accelerator: Inferentia2 and Trainium</p> </li> <li> <p>Neuron driver version: 2.19</p> </li> </ul> </dd> </dl>"
         }
       },
       "documentation":"<p> Identifies a model that you want to host and the resources chosen to deploy for hosting it. If you are deploying multiple models, tell SageMaker how to distribute traffic among the models by specifying variant weights. For more information on production variants, check <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/model-ab-testing.html\"> Production variants</a>. </p>"
@@ -33857,7 +33858,8 @@
       "enum":[
         "al2-ami-sagemaker-inference-gpu-2",
         "al2-ami-sagemaker-inference-gpu-2-1",
-        "al2-ami-sagemaker-inference-gpu-3-1"
+        "al2-ami-sagemaker-inference-gpu-3-1",
+        "al2-ami-sagemaker-inference-neuron-2"
       ]
     },
     "ProductionVariantInstanceType":{
@@ -37744,7 +37746,7 @@
         },
         "MaxPendingTimeInSeconds":{
           "shape":"MaxPendingTimeInSeconds",
-          "documentation":"<p>The maximum length of time, in seconds, that a training or compilation job can be pending before it is stopped.</p>"
+          "documentation":"<p>The maximum length of time, in seconds, that a training or compilation job can be pending before it is stopped.</p> <note> <p>When working with training jobs that use capacity from <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/reserve-capacity-with-training-plans.html\">training plans</a>, not all <code>Pending</code> job states count against the <code>MaxPendingTimeInSeconds</code> limit. The following scenarios do not increment the <code>MaxPendingTimeInSeconds</code> counter:</p> <ul> <li> <p>The plan is in a <code>Scheduled</code> state: Jobs queued (in <code>Pending</code> status) before a plan's start date (waiting for scheduled start time)</p> </li> <li> <p>Between capacity reservations: Jobs temporarily back to <code>Pending</code> status between two capacity reservation periods</p> </li> </ul> <p> <code>MaxPendingTimeInSeconds</code> only increments when jobs are actively waiting for capacity in an <code>Active</code> plan.</p> </note>"
         }
       },
       "documentation":"<p>Specifies a limit to how long a job can run. When the job reaches the time limit, SageMaker ends the job. Use this API to cap costs.</p> <p>To stop a training job, SageMaker sends the algorithm the <code>SIGTERM</code> signal, which delays job termination for 120 seconds. Algorithms can use this 120-second window to save the model artifacts, so the results of training are not lost. </p> <p>The training algorithms provided by SageMaker automatically save the intermediate results of a model training job when possible. This attempt to save artifacts is only a best effort case as model might not be in a state from which it can be saved. For example, if training has just started, the model might not be ready to save. When saved, this intermediate data is a valid model artifact. You can use it to create a model with <code>CreateModel</code>.</p> <note> <p>The Neural Topic Model (NTM) currently does not support saving intermediate model artifacts. When training NTMs, make sure that the maximum runtime is sufficient for the training job to complete.</p> </note>"
diff --git a/sdk/src/Services/SageMaker/Generated/Model/Internal/MarshallTransformations/ModelPackageSummaryUnmarshaller.cs b/sdk/src/Services/SageMaker/Generated/Model/Internal/MarshallTransformations/ModelPackageSummaryUnmarshaller.cs
@@ -78,6 +78,12 @@ public ModelPackageSummary Unmarshall(JsonUnmarshallerContext context)
                     unmarshalledObject.ModelApprovalStatus = unmarshaller.Unmarshall(context);
                     continue;
                 }
+                if (context.TestExpression("ModelLifeCycle", targetDepth))
+                {
+                    var unmarshaller = ModelLifeCycleUnmarshaller.Instance;
+                    unmarshalledObject.ModelLifeCycle = unmarshaller.Unmarshall(context);
+                    continue;
+                }
                 if (context.TestExpression("ModelPackageArn", targetDepth))
                 {
                     var unmarshaller = StringUnmarshaller.Instance;
diff --git a/sdk/src/Services/SageMaker/Generated/Model/ModelPackageSummary.cs b/sdk/src/Services/SageMaker/Generated/Model/ModelPackageSummary.cs
@@ -36,6 +36,7 @@ public partial class ModelPackageSummary
     {
         private DateTime? _creationTime;
         private ModelApprovalStatus _modelApprovalStatus;
+        private ModelLifeCycle _modelLifeCycle;
         private string _modelPackageArn;
         private string _modelPackageDescription;
         private string _modelPackageGroupName;
@@ -93,6 +94,21 @@ internal bool IsSetModelApprovalStatus()
             return this._modelApprovalStatus != null;
         }
 
+        /// <summary>
+        /// Gets and sets the property ModelLifeCycle.
+        /// </summary>
+        public ModelLifeCycle ModelLifeCycle
+        {
+            get { return this._modelLifeCycle; }
+            set { this._modelLifeCycle = value; }
+        }
+
+        // Check to see if ModelLifeCycle property is set
+        internal bool IsSetModelLifeCycle()
+        {
+            return this._modelLifeCycle != null;
+        }
+
         /// <summary>
         /// Gets and sets the property ModelPackageArn. 
         /// <para>
diff --git a/sdk/src/Services/SageMaker/Generated/Model/ProductionVariant.cs b/sdk/src/Services/SageMaker/Generated/Model/ProductionVariant.cs
@@ -201,6 +201,14 @@ internal bool IsSetEnableSSMAccess()
         /// <para>
         /// NVIDIA Container Toolkit with disabled CUDA-compat mounting
         /// </para>
+        ///  </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-neuron-2</dt> <dd> <ul> <li> 
+        /// <para>
+        /// Accelerator: Inferentia2 and Trainium
+        /// </para>
+        ///  </li> <li> 
+        /// <para>
+        /// Neuron driver version: 2.19
+        /// </para>
         ///  </li> </ul> </dd> </dl>
         /// </summary>
         public ProductionVariantInferenceAmiVersion InferenceAmiVersion
diff --git a/sdk/src/Services/SageMaker/Generated/Model/StoppingCondition.cs b/sdk/src/Services/SageMaker/Generated/Model/StoppingCondition.cs
@@ -68,6 +68,29 @@ public partial class StoppingCondition
         /// The maximum length of time, in seconds, that a training or compilation job can be
         /// pending before it is stopped.
         /// </para>
+        ///  <note> 
+        /// <para>
+        /// When working with training jobs that use capacity from <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/reserve-capacity-with-training-plans.html">training
+        /// plans</a>, not all <c>Pending</c> job states count against the <c>MaxPendingTimeInSeconds</c>
+        /// limit. The following scenarios do not increment the <c>MaxPendingTimeInSeconds</c>
+        /// counter:
+        /// </para>
+        ///  <ul> <li> 
+        /// <para>
+        /// The plan is in a <c>Scheduled</c> state: Jobs queued (in <c>Pending</c> status) before
+        /// a plan's start date (waiting for scheduled start time)
+        /// </para>
+        ///  </li> <li> 
+        /// <para>
+        /// Between capacity reservations: Jobs temporarily back to <c>Pending</c> status between
+        /// two capacity reservation periods
+        /// </para>
+        ///  </li> </ul> 
+        /// <para>
+        ///  <c>MaxPendingTimeInSeconds</c> only increments when jobs are actively waiting for
+        /// capacity in an <c>Active</c> plan.
+        /// </para>
+        ///  </note>
         /// </summary>
         [AWSProperty(Min=7200, Max=2419200)]
         public int MaxPendingTimeInSeconds
diff --git a/sdk/src/Services/SageMaker/Generated/ServiceEnumerations.cs b/sdk/src/Services/SageMaker/Generated/ServiceEnumerations.cs
@@ -14004,6 +14004,10 @@ public class ProductionVariantInferenceAmiVersion : ConstantClass
         /// Constant Al2AmiSagemakerInferenceGpu31 for ProductionVariantInferenceAmiVersion
         /// </summary>
         public static readonly ProductionVariantInferenceAmiVersion Al2AmiSagemakerInferenceGpu31 = new ProductionVariantInferenceAmiVersion("al2-ami-sagemaker-inference-gpu-3-1");
+        /// <summary>
+        /// Constant Al2AmiSagemakerInferenceNeuron2 for ProductionVariantInferenceAmiVersion
+        /// </summary>
+        public static readonly ProductionVariantInferenceAmiVersion Al2AmiSagemakerInferenceNeuron2 = new ProductionVariantInferenceAmiVersion("al2-ami-sagemaker-inference-neuron-2");
 
         /// <summary>
         /// This constant constructor does not need to be called if the constant

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,12 @@ public ModelPackageSummary Unmarshall(JsonUnmarshallerContext context)`
`78`	`78`	`unmarshalledObject.ModelApprovalStatus = unmarshaller.Unmarshall(context);`
`79`	`79`	`continue;`
`80`	`80`	`}`
	`81`	`+ if (context.TestExpression("ModelLifeCycle", targetDepth))`
	`82`	`+ {`
	`83`	`+ var unmarshaller = ModelLifeCycleUnmarshaller.Instance;`
	`84`	`+ unmarshalledObject.ModelLifeCycle = unmarshaller.Unmarshall(context);`
	`85`	`+ continue;`
	`86`	`+ }`
`81`	`87`	`if (context.TestExpression("ModelPackageArn", targetDepth))`
`82`	`88`	`{`
`83`	`89`	`var unmarshaller = StringUnmarshaller.Instance;`