Skip to content

Commit 365a333

Browse files
author
AWS
committed
Amazon SageMaker Service Update: SageMaker Training Managed Warm Pools let you retain provisioned infrastructure to reduce latency for repetitive training workloads.
1 parent 7e1bb48 commit 365a333

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "Amazon SageMaker Service",
4+
"contributor": "",
5+
"description": "SageMaker Training Managed Warm Pools let you retain provisioned infrastructure to reduce latency for repetitive training workloads."
6+
}

services/sagemaker/src/main/resources/codegen-resources/service-2.json

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3097,7 +3097,7 @@
30973097
"errors":[
30983098
{"shape":"ResourceNotFound"}
30993099
],
3100-
"documentation":"<p>Update a model training job to request a new Debugger profiling configuration.</p>"
3100+
"documentation":"<p>Update a model training job to request a new Debugger profiling configuration or to change warm pool retention length.</p>"
31013101
},
31023102
"UpdateTrial":{
31033103
"name":"UpdateTrial",
@@ -12068,6 +12068,10 @@
1206812068
"Environment":{
1206912069
"shape":"TrainingEnvironmentMap",
1207012070
"documentation":"<p>The environment variables to set in the Docker container.</p>"
12071+
},
12072+
"WarmPoolStatus":{
12073+
"shape":"WarmPoolStatus",
12074+
"documentation":"<p>The status of the warm pool associated with the training job.</p>"
1207112075
}
1207212076
}
1207312077
},
@@ -16126,6 +16130,11 @@
1612616130
},
1612716131
"documentation":"<p>The JupyterServer app settings.</p>"
1612816132
},
16133+
"KeepAlivePeriodInSeconds":{
16134+
"type":"integer",
16135+
"max":3600,
16136+
"min":0
16137+
},
1612916138
"KernelDisplayName":{
1613016139
"type":"string",
1613116140
"max":1024
@@ -19358,6 +19367,10 @@
1935819367
"SortOrder":{
1935919368
"shape":"SortOrder",
1936019369
"documentation":"<p>The sort order for results. The default is <code>Ascending</code>.</p>"
19370+
},
19371+
"WarmPoolStatusEquals":{
19372+
"shape":"WarmPoolResourceStatus",
19373+
"documentation":"<p>A filter that retrieves only training jobs with a specific warm pool status.</p>"
1936119374
}
1936219375
}
1936319376
},
@@ -24471,10 +24484,25 @@
2447124484
"InstanceGroups":{
2447224485
"shape":"InstanceGroups",
2447324486
"documentation":"<p>The configuration of a heterogeneous cluster in JSON format.</p>"
24487+
},
24488+
"KeepAlivePeriodInSeconds":{
24489+
"shape":"KeepAlivePeriodInSeconds",
24490+
"documentation":"<p>The duration of time in seconds to retain configured resources in a warm pool for subsequent training jobs.</p>"
2447424491
}
2447524492
},
2447624493
"documentation":"<p>Describes the resources, including ML compute instances and ML storage volumes, to use for model training. </p>"
2447724494
},
24495+
"ResourceConfigForUpdate":{
24496+
"type":"structure",
24497+
"required":["KeepAlivePeriodInSeconds"],
24498+
"members":{
24499+
"KeepAlivePeriodInSeconds":{
24500+
"shape":"KeepAlivePeriodInSeconds",
24501+
"documentation":"<p>The <code>KeepAlivePeriodInSeconds</code> value specified in the <code>ResourceConfig</code> to update.</p>"
24502+
}
24503+
},
24504+
"documentation":"<p>The <code>ResourceConfig</code> to update <code>KeepAlivePeriodInSeconds</code>. Other fields in the <code>ResourceConfig</code> cannot be updated.</p>"
24505+
},
2447824506
"ResourceId":{
2447924507
"type":"string",
2448024508
"max":32
@@ -24532,6 +24560,10 @@
2453224560
"min":1,
2453324561
"pattern":".+"
2453424562
},
24563+
"ResourceRetainedBillableTimeInSeconds":{
24564+
"type":"integer",
24565+
"min":0
24566+
},
2453524567
"ResourceSpec":{
2453624568
"type":"structure",
2453724569
"members":{
@@ -26424,6 +26456,10 @@
2642426456
"TrainingJobStatus":{
2642526457
"shape":"TrainingJobStatus",
2642626458
"documentation":"<p>The status of the training job.</p>"
26459+
},
26460+
"WarmPoolStatus":{
26461+
"shape":"WarmPoolStatus",
26462+
"documentation":"<p>The status of the warm pool associated with the training job.</p>"
2642726463
}
2642826464
},
2642926465
"documentation":"<p>Provides summary information about a training job.</p>"
@@ -28009,6 +28045,10 @@
2800928045
"ProfilerRuleConfigurations":{
2801028046
"shape":"ProfilerRuleConfigurations",
2801128047
"documentation":"<p>Configuration information for Debugger rules for profiling system and framework metrics.</p>"
28048+
},
28049+
"ResourceConfig":{
28050+
"shape":"ResourceConfigForUpdate",
28051+
"documentation":"<p>The training job <code>ResourceConfig</code> to update warm pool retention length.</p>"
2801228052
}
2801328053
}
2801428054
},
@@ -28447,6 +28487,34 @@
2844728487
"max":3600,
2844828488
"min":0
2844928489
},
28490+
"WarmPoolResourceStatus":{
28491+
"type":"string",
28492+
"enum":[
28493+
"Available",
28494+
"Terminated",
28495+
"Reused",
28496+
"InUse"
28497+
]
28498+
},
28499+
"WarmPoolStatus":{
28500+
"type":"structure",
28501+
"required":["Status"],
28502+
"members":{
28503+
"Status":{
28504+
"shape":"WarmPoolResourceStatus",
28505+
"documentation":"<p>The status of the warm pool.</p> <ul> <li> <p> <code>InUse</code>: The warm pool is in use for the training job.</p> </li> <li> <p> <code>Available</code>: The warm pool is available to reuse for a matching training job.</p> </li> <li> <p> <code>Reused</code>: The warm pool moved to a matching training job for reuse.</p> </li> <li> <p> <code>Terminated</code>: The warm pool is no longer available. Warm pools are unavailable if they are terminated by a user, terminated for a patch update, or terminated for exceeding the specified <code>KeepAlivePeriodInSeconds</code>.</p> </li> </ul>"
28506+
},
28507+
"ResourceRetainedBillableTimeInSeconds":{
28508+
"shape":"ResourceRetainedBillableTimeInSeconds",
28509+
"documentation":"<p>The billable time in seconds used by the warm pool. Billable time refers to the absolute wall-clock time.</p> <p>Multiply <code>ResourceRetainedBillableTimeInSeconds</code> by the number of instances (<code>InstanceCount</code>) in your training cluster to get the total compute time SageMaker bills you if you run warm pool training. The formula is as follows: <code>ResourceRetainedBillableTimeInSeconds * InstanceCount</code>.</p>"
28510+
},
28511+
"ReusedByJob":{
28512+
"shape":"TrainingJobName",
28513+
"documentation":"<p>The name of the matching training job that reused the warm pool.</p>"
28514+
}
28515+
},
28516+
"documentation":"<p>Status and billing information about the warm pool.</p>"
28517+
},
2845028518
"Workforce":{
2845128519
"type":"structure",
2845228520
"required":[

0 commit comments

Comments
 (0)