Skip to content

Commit 10d7dda

Browse files
Adds support for GB200 UltraServers in Amazon SageMaker training jobs, training plans, and HyperPod clusters
1 parent d47a571 commit 10d7dda

File tree

62 files changed

+4581
-89
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+4581
-89
lines changed

generator/ServiceModels/sagemaker/sagemaker-2017-07-24.api.json

Lines changed: 216 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,6 +2183,18 @@
21832183
"input":{"shape":"DescribeProjectInput"},
21842184
"output":{"shape":"DescribeProjectOutput"}
21852185
},
2186+
"DescribeReservedCapacity":{
2187+
"name":"DescribeReservedCapacity",
2188+
"http":{
2189+
"method":"POST",
2190+
"requestUri":"/"
2191+
},
2192+
"input":{"shape":"DescribeReservedCapacityRequest"},
2193+
"output":{"shape":"DescribeReservedCapacityResponse"},
2194+
"errors":[
2195+
{"shape":"ResourceNotFound"}
2196+
]
2197+
},
21862198
"DescribeSpace":{
21872199
"name":"DescribeSpace",
21882200
"http":{
@@ -3215,6 +3227,18 @@
32153227
{"shape":"ResourceNotFound"}
32163228
]
32173229
},
3230+
"ListUltraServersByReservedCapacity":{
3231+
"name":"ListUltraServersByReservedCapacity",
3232+
"http":{
3233+
"method":"POST",
3234+
"requestUri":"/"
3235+
},
3236+
"input":{"shape":"ListUltraServersByReservedCapacityRequest"},
3237+
"output":{"shape":"ListUltraServersByReservedCapacityResponse"},
3238+
"errors":[
3239+
{"shape":"ResourceNotFound"}
3240+
]
3241+
},
32183242
"ListUserProfiles":{
32193243
"name":"ListUserProfiles",
32203244
"http":{
@@ -5722,6 +5746,11 @@
57225746
"box":true,
57235747
"min":0
57245748
},
5749+
"AvailableSpareInstanceCount":{
5750+
"type":"integer",
5751+
"box":true,
5752+
"min":0
5753+
},
57255754
"AwsManagedHumanLoopRequestSource":{
57265755
"type":"string",
57275756
"enum":[
@@ -7116,7 +7145,8 @@
71167145
"PrivateDnsHostname":{"shape":"ClusterPrivateDnsHostname"},
71177146
"Placement":{"shape":"ClusterInstancePlacement"},
71187147
"CurrentImageId":{"shape":"ImageId"},
7119-
"DesiredImageId":{"shape":"ImageId"}
7148+
"DesiredImageId":{"shape":"ImageId"},
7149+
"UltraServerInfo":{"shape":"UltraServerInfo"}
71207150
}
71217151
},
71227152
"ClusterNodeId":{
@@ -7174,7 +7204,8 @@
71747204
"InstanceType":{"shape":"ClusterInstanceType"},
71757205
"LaunchTime":{"shape":"Timestamp"},
71767206
"LastSoftwareUpdateTime":{"shape":"Timestamp"},
7177-
"InstanceStatus":{"shape":"ClusterInstanceStatusDetails"}
7207+
"InstanceStatus":{"shape":"ClusterInstanceStatusDetails"},
7208+
"UltraServerInfo":{"shape":"UltraServerInfo"}
71787209
}
71797210
},
71807211
"ClusterNonNegativeInstanceCount":{
@@ -7682,6 +7713,11 @@
76827713
"min":0,
76837714
"pattern":".*"
76847715
},
7716+
"ConfiguredSpareInstanceCount":{
7717+
"type":"integer",
7718+
"box":true,
7719+
"min":0
7720+
},
76857721
"ConflictException":{
76867722
"type":"structure",
76877723
"members":{
@@ -9367,6 +9403,7 @@
93679403
"members":{
93689404
"TrainingPlanName":{"shape":"TrainingPlanName"},
93699405
"TrainingPlanOfferingId":{"shape":"TrainingPlanOfferingId"},
9406+
"SpareInstanceCountPerUltraServer":{"shape":"SpareInstanceCountPerUltraServer"},
93709407
"Tags":{"shape":"TagList"}
93719408
}
93729409
},
@@ -12370,6 +12407,36 @@
1237012407
"LastModifiedBy":{"shape":"UserContext"}
1237112408
}
1237212409
},
12410+
"DescribeReservedCapacityRequest":{
12411+
"type":"structure",
12412+
"required":["ReservedCapacityArn"],
12413+
"members":{
12414+
"ReservedCapacityArn":{"shape":"ReservedCapacityArn"}
12415+
}
12416+
},
12417+
"DescribeReservedCapacityResponse":{
12418+
"type":"structure",
12419+
"required":[
12420+
"ReservedCapacityArn",
12421+
"InstanceType",
12422+
"TotalInstanceCount"
12423+
],
12424+
"members":{
12425+
"ReservedCapacityArn":{"shape":"ReservedCapacityArn"},
12426+
"ReservedCapacityType":{"shape":"ReservedCapacityType"},
12427+
"Status":{"shape":"ReservedCapacityStatus"},
12428+
"AvailabilityZone":{"shape":"AvailabilityZone"},
12429+
"DurationHours":{"shape":"ReservedCapacityDurationHours"},
12430+
"DurationMinutes":{"shape":"ReservedCapacityDurationMinutes"},
12431+
"StartTime":{"shape":"Timestamp"},
12432+
"EndTime":{"shape":"Timestamp"},
12433+
"InstanceType":{"shape":"ReservedCapacityInstanceType"},
12434+
"TotalInstanceCount":{"shape":"TotalInstanceCount"},
12435+
"AvailableInstanceCount":{"shape":"AvailableInstanceCount"},
12436+
"InUseInstanceCount":{"shape":"InUseInstanceCount"},
12437+
"UltraServerSummary":{"shape":"UltraServerSummary"}
12438+
}
12439+
},
1237312440
"DescribeSpaceRequest":{
1237412441
"type":"structure",
1237512442
"required":[
@@ -12534,6 +12601,9 @@
1253412601
"TotalInstanceCount":{"shape":"TotalInstanceCount"},
1253512602
"AvailableInstanceCount":{"shape":"AvailableInstanceCount"},
1253612603
"InUseInstanceCount":{"shape":"InUseInstanceCount"},
12604+
"UnhealthyInstanceCount":{"shape":"UnhealthyInstanceCount"},
12605+
"AvailableSpareInstanceCount":{"shape":"AvailableSpareInstanceCount"},
12606+
"TotalUltraServerCount":{"shape":"UltraServerCount"},
1253712607
"TargetResources":{"shape":"SageMakerResourceNames"},
1253812608
"ReservedCapacitySummaries":{"shape":"ReservedCapacitySummaries"}
1253912609
}
@@ -16315,6 +16385,16 @@
1631516385
"MinimumInstanceMetadataServiceVersion":{"shape":"MinimumInstanceMetadataServiceVersion"}
1631616386
}
1631716387
},
16388+
"InstancePlacementConfig":{
16389+
"type":"structure",
16390+
"members":{
16391+
"EnableMultipleJobs":{
16392+
"shape":"Boolean",
16393+
"box":true
16394+
},
16395+
"PlacementSpecifications":{"shape":"PlacementSpecifications"}
16396+
}
16397+
},
1631816398
"InstanceType":{
1631916399
"type":"string",
1632016400
"enum":[
@@ -18763,6 +18843,23 @@
1876318843
"NextToken":{"shape":"NextToken"}
1876418844
}
1876518845
},
18846+
"ListUltraServersByReservedCapacityRequest":{
18847+
"type":"structure",
18848+
"required":["ReservedCapacityArn"],
18849+
"members":{
18850+
"ReservedCapacityArn":{"shape":"ReservedCapacityArn"},
18851+
"MaxResults":{"shape":"MaxResults"},
18852+
"NextToken":{"shape":"NextToken"}
18853+
}
18854+
},
18855+
"ListUltraServersByReservedCapacityResponse":{
18856+
"type":"structure",
18857+
"required":["UltraServers"],
18858+
"members":{
18859+
"NextToken":{"shape":"NextToken"},
18860+
"UltraServers":{"shape":"UltraServers"}
18861+
}
18862+
},
1876618863
"ListUserProfilesRequest":{
1876718864
"type":"structure",
1876818865
"members":{
@@ -21863,6 +21960,23 @@
2186321960
"max":100,
2186421961
"min":0
2186521962
},
21963+
"PlacementSpecification":{
21964+
"type":"structure",
21965+
"required":["InstanceCount"],
21966+
"members":{
21967+
"UltraServerId":{"shape":"String256"},
21968+
"InstanceCount":{
21969+
"shape":"TrainingInstanceCount",
21970+
"box":true
21971+
}
21972+
}
21973+
},
21974+
"PlacementSpecifications":{
21975+
"type":"list",
21976+
"member":{"shape":"PlacementSpecification"},
21977+
"max":10,
21978+
"min":0
21979+
},
2186621980
"PlatformIdentifier":{
2186721981
"type":"string",
2186821982
"max":15,
@@ -23679,7 +23793,8 @@
2367923793
"ml.trn1.32xlarge",
2368023794
"ml.trn2.48xlarge",
2368123795
"ml.p6-b200.48xlarge",
23682-
"ml.p4de.24xlarge"
23796+
"ml.p4de.24xlarge",
23797+
"ml.p6e-gb200.36xlarge"
2368323798
]
2368423799
},
2368523800
"ReservedCapacityOffering":{
@@ -23689,6 +23804,9 @@
2368923804
"InstanceCount"
2369023805
],
2369123806
"members":{
23807+
"ReservedCapacityType":{"shape":"ReservedCapacityType"},
23808+
"UltraServerType":{"shape":"UltraServerType"},
23809+
"UltraServerCount":{"shape":"UltraServerCount"},
2369223810
"InstanceType":{"shape":"ReservedCapacityInstanceType"},
2369323811
"InstanceCount":{
2369423812
"shape":"ReservedCapacityInstanceCount",
@@ -23733,6 +23851,9 @@
2373323851
],
2373423852
"members":{
2373523853
"ReservedCapacityArn":{"shape":"ReservedCapacityArn"},
23854+
"ReservedCapacityType":{"shape":"ReservedCapacityType"},
23855+
"UltraServerType":{"shape":"UltraServerType"},
23856+
"UltraServerCount":{"shape":"UltraServerCount"},
2373623857
"InstanceType":{"shape":"ReservedCapacityInstanceType"},
2373723858
"TotalInstanceCount":{"shape":"TotalInstanceCount"},
2373823859
"Status":{"shape":"ReservedCapacityStatus"},
@@ -23743,6 +23864,13 @@
2374323864
"EndTime":{"shape":"Timestamp"}
2374423865
}
2374523866
},
23867+
"ReservedCapacityType":{
23868+
"type":"string",
23869+
"enum":[
23870+
"UltraServer",
23871+
"Instance"
23872+
]
23873+
},
2374623874
"ResolvedAttributes":{
2374723875
"type":"structure",
2374823876
"members":{
@@ -23819,7 +23947,8 @@
2381923947
"VolumeKmsKeyId":{"shape":"KmsKeyId"},
2382023948
"KeepAlivePeriodInSeconds":{"shape":"KeepAlivePeriodInSeconds"},
2382123949
"InstanceGroups":{"shape":"InstanceGroups"},
23822-
"TrainingPlanArn":{"shape":"TrainingPlanArn"}
23950+
"TrainingPlanArn":{"shape":"TrainingPlanArn"},
23951+
"InstancePlacementConfig":{"shape":"InstancePlacementConfig"}
2382323952
}
2382423953
},
2382523954
"ResourceConfigForUpdate":{
@@ -24397,6 +24526,8 @@
2439724526
"shape":"ReservedCapacityInstanceCount",
2439824527
"box":true
2439924528
},
24529+
"UltraServerType":{"shape":"UltraServerType"},
24530+
"UltraServerCount":{"shape":"UltraServerCount"},
2440024531
"StartTimeAfter":{"shape":"Timestamp"},
2440124532
"EndTimeBefore":{"shape":"Timestamp"},
2440224533
"DurationHours":{"shape":"TrainingPlanDurationHoursInput"},
@@ -24955,6 +25086,11 @@
2495525086
"EbsStorageSettings":{"shape":"EbsStorageSettings"}
2495625087
}
2495725088
},
25089+
"SpareInstanceCountPerUltraServer":{
25090+
"type":"integer",
25091+
"box":true,
25092+
"min":0
25093+
},
2495825094
"SpawnRate":{
2495925095
"type":"integer",
2496025096
"box":true,
@@ -26213,7 +26349,8 @@
2621326349
"ml.r7i.12xlarge",
2621426350
"ml.r7i.16xlarge",
2621526351
"ml.r7i.24xlarge",
26216-
"ml.r7i.48xlarge"
26352+
"ml.r7i.48xlarge",
26353+
"ml.p6e-gb200.36xlarge"
2621726354
]
2621826355
},
2621926356
"TrainingInstanceTypes":{
@@ -26526,6 +26663,7 @@
2652626663
"TotalInstanceCount":{"shape":"TotalInstanceCount"},
2652726664
"AvailableInstanceCount":{"shape":"AvailableInstanceCount"},
2652826665
"InUseInstanceCount":{"shape":"InUseInstanceCount"},
26666+
"TotalUltraServerCount":{"shape":"UltraServerCount"},
2652926667
"TargetResources":{"shape":"SageMakerResourceNames"},
2653026668
"ReservedCapacitySummaries":{"shape":"ReservedCapacitySummaries"}
2653126669
}
@@ -27186,6 +27324,79 @@
2718627324
"max":4000000,
2718727325
"min":10000
2718827326
},
27327+
"UltraServer":{
27328+
"type":"structure",
27329+
"required":[
27330+
"UltraServerId",
27331+
"UltraServerType",
27332+
"AvailabilityZone",
27333+
"InstanceType",
27334+
"TotalInstanceCount"
27335+
],
27336+
"members":{
27337+
"UltraServerId":{"shape":"NonEmptyString256"},
27338+
"UltraServerType":{"shape":"UltraServerType"},
27339+
"AvailabilityZone":{"shape":"AvailabilityZone"},
27340+
"InstanceType":{"shape":"ReservedCapacityInstanceType"},
27341+
"TotalInstanceCount":{"shape":"TotalInstanceCount"},
27342+
"ConfiguredSpareInstanceCount":{"shape":"ConfiguredSpareInstanceCount"},
27343+
"AvailableInstanceCount":{"shape":"AvailableInstanceCount"},
27344+
"InUseInstanceCount":{"shape":"InUseInstanceCount"},
27345+
"AvailableSpareInstanceCount":{"shape":"AvailableSpareInstanceCount"},
27346+
"UnhealthyInstanceCount":{"shape":"UnhealthyInstanceCount"},
27347+
"HealthStatus":{"shape":"UltraServerHealthStatus"}
27348+
}
27349+
},
27350+
"UltraServerCount":{
27351+
"type":"integer",
27352+
"box":true,
27353+
"min":1
27354+
},
27355+
"UltraServerHealthStatus":{
27356+
"type":"string",
27357+
"enum":[
27358+
"OK",
27359+
"Impaired",
27360+
"Insufficient-Data"
27361+
]
27362+
},
27363+
"UltraServerInfo":{
27364+
"type":"structure",
27365+
"members":{
27366+
"Id":{"shape":"String"}
27367+
}
27368+
},
27369+
"UltraServerSummary":{
27370+
"type":"structure",
27371+
"required":[
27372+
"UltraServerType",
27373+
"InstanceType"
27374+
],
27375+
"members":{
27376+
"UltraServerType":{"shape":"UltraServerType"},
27377+
"InstanceType":{"shape":"ReservedCapacityInstanceType"},
27378+
"UltraServerCount":{"shape":"UltraServerCount"},
27379+
"AvailableSpareInstanceCount":{"shape":"AvailableSpareInstanceCount"},
27380+
"UnhealthyInstanceCount":{"shape":"UnhealthyInstanceCount"}
27381+
}
27382+
},
27383+
"UltraServerType":{
27384+
"type":"string",
27385+
"max":64,
27386+
"min":1,
27387+
"pattern":"ml.[a-z0-9\\-.]+"
27388+
},
27389+
"UltraServers":{
27390+
"type":"list",
27391+
"member":{"shape":"UltraServer"},
27392+
"max":100,
27393+
"min":0
27394+
},
27395+
"UnhealthyInstanceCount":{
27396+
"type":"integer",
27397+
"box":true,
27398+
"min":0
27399+
},
2718927400
"UnifiedStudioDomainId":{
2719027401
"type":"string",
2719127402
"pattern":"dzd[-_][a-zA-Z0-9_-]{1,36}"

0 commit comments

Comments
 (0)