Skip to content

Commit b83ea99

Browse files
himani2411Himani Anil Deshpande
andauthored
Add Validator to check OS Capacity Reservation Platform match (aws#6747)
Co-authored-by: Himani Anil Deshpande <[email protected]>
1 parent a0b68d2 commit b83ea99

File tree

6 files changed

+163
-19
lines changed

6 files changed

+163
-19
lines changed

cli/src/pcluster/aws/aws_resources.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,10 @@ def instance_type(self):
571571
"""Return the instance type associated to the Capacity Reservation."""
572572
return self.capacity_reservation_data.get("InstanceType")
573573

574+
def instance_platform(self):
575+
"""Return the instance platform associated to the Capacity Reservation."""
576+
return self.capacity_reservation_data.get("InstancePlatform")
577+
574578
def availability_zone(self):
575579
"""Return the availability zone associated to the Capacity Reservation."""
576580
return self.capacity_reservation_data.get("AvailabilityZone")

cli/src/pcluster/config/cluster_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3113,6 +3113,7 @@ def _register_validators(self, context: ValidatorContext = None): # noqa: C901
31133113
is_flexible=compute_resource.is_flexible(),
31143114
subnet=queue.networking.subnet_ids[0],
31153115
capacity_type=queue.capacity_type,
3116+
os=self.image.os,
31163117
)
31173118
self._register_validator(
31183119
CapacityReservationResourceGroupValidator,

cli/src/pcluster/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,25 @@
4949
SLURM = "slurm"
5050
AWSBATCH = "awsbatch"
5151

52+
53+
# Capacity Reservation Platform types we support.
54+
CR_PLATFORM_LINUX_UNIX = "Linux/UNIX"
55+
CR_PLATFORM_UBUNTU_PRO = "Ubuntu Pro"
56+
CR_PLATFORM_RHEL = "Red Hat Enterprise Linux"
57+
58+
59+
CAPACITY_RESERVATION_OS_MAP = {
60+
"alinux2": CR_PLATFORM_LINUX_UNIX,
61+
"alinux2023": CR_PLATFORM_LINUX_UNIX,
62+
"ubuntu2004": CR_PLATFORM_UBUNTU_PRO,
63+
"ubuntu2204": CR_PLATFORM_UBUNTU_PRO,
64+
"ubuntu2404": CR_PLATFORM_UBUNTU_PRO,
65+
"rhel8": CR_PLATFORM_RHEL,
66+
"rocky8": CR_PLATFORM_RHEL,
67+
"rhel9": CR_PLATFORM_RHEL,
68+
"rocky9": CR_PLATFORM_RHEL,
69+
}
70+
5271
OS_MAPPING = {
5372
"alinux2": {"user": "ec2-user"},
5473
"alinux2023": {"user": "ec2-user"},

cli/src/pcluster/validators/ec2_validators.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919
from pcluster.aws.aws_resources import CapacityReservationInfo
2020
from pcluster.aws.common import AWSClientError
2121
from pcluster.config.common import CapacityType
22-
from pcluster.constants import NVIDIA_OPENRM_UNSUPPORTED_INSTANCE_TYPES, UNSUPPORTED_OSES_FOR_MICRO_NANO
22+
from pcluster.constants import (
23+
CAPACITY_RESERVATION_OS_MAP,
24+
NVIDIA_OPENRM_UNSUPPORTED_INSTANCE_TYPES,
25+
UNSUPPORTED_OSES_FOR_MICRO_NANO,
26+
)
2327
from pcluster.utils import get_resource_name_from_resource_arn
2428
from pcluster.validators.common import FailureLevel, Validator
2529

@@ -331,9 +335,18 @@ def _validate(
331335
is_flexible: bool,
332336
subnet: str,
333337
capacity_type: CapacityType,
338+
os,
334339
):
335340
if capacity_reservation_id:
336341
capacity_reservation = AWSApi.instance().ec2.describe_capacity_reservations([capacity_reservation_id])[0]
342+
cr_platform = capacity_reservation.instance_platform()
343+
if CAPACITY_RESERVATION_OS_MAP.get(os) != cr_platform:
344+
self._add_failure(
345+
f"Capacity reservation {capacity_reservation_id} has platform {cr_platform},"
346+
f" which is not compatible with the cluster OS {os}. "
347+
f"Please use a reservation with platform {CAPACITY_RESERVATION_OS_MAP.get(os)}.",
348+
FailureLevel.ERROR,
349+
)
337350

338351
if not instance_types:
339352
# If the instance type doesn't exist, this is an invalid config,

cli/tests/pcluster/validators/test_all_validators.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,20 +398,23 @@ def test_slurm_validators_are_called_with_correct_argument(test_datadir, mocker)
398398
call(
399399
capacity_reservation_id="cr-34567",
400400
instance_types=["t3.large"],
401+
os="alinux2",
401402
is_flexible=True,
402403
subnet="subnet-23456789",
403404
capacity_type=CapacityType.ONDEMAND,
404405
),
405406
call(
406407
capacity_reservation_id="cr-12345",
407408
instance_types=["t3.xlarge"],
409+
os="alinux2",
408410
is_flexible=True,
409411
subnet="subnet-23456789",
410412
capacity_type=CapacityType.CAPACITY_BLOCK,
411413
),
412414
call(
413415
capacity_reservation_id="cr-23456",
414416
instance_types=["t3.xlarge"],
417+
os="alinux2",
415418
is_flexible=False,
416419
subnet="subnet-23456789",
417420
capacity_type=CapacityType.CAPACITY_BLOCK,

cli/tests/pcluster/validators/test_ec2_validators.py

Lines changed: 122 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -708,109 +708,166 @@ def test_placement_group_validator(
708708
"is_flexible",
709709
"subnet_availability_zone",
710710
"capacity_type",
711+
"os",
711712
"expected_messages",
712713
),
713714
[
714715
(
715-
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a"}),
716+
CapacityReservationInfo(
717+
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "InstancePlatform": "Linux/UNIX"}
718+
),
716719
["c5.xlarge"],
717720
False,
718721
"us-east-1a",
719722
None,
723+
"alinux2",
720724
[],
721725
),
722726
# Wrong instance type
723727
(
724-
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1a"}),
728+
CapacityReservationInfo(
729+
{"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1a", "InstancePlatform": "Linux/UNIX"}
730+
),
725731
["c5.xlarge"],
726732
False,
727733
"us-east-1a",
728734
CapacityType.ONDEMAND,
735+
"alinux2023",
729736
["Capacity reservation .* must have the same instance type as c5.xlarge."],
730737
),
731738
# Wrong availability zone
732739
(
733-
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1b"}),
740+
CapacityReservationInfo(
741+
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1b", "InstancePlatform": "Linux/UNIX"}
742+
),
734743
["c5.xlarge"],
735744
False,
736745
"us-east-1a",
737746
CapacityType.SPOT,
738-
["Capacity reservation .* must use the same availability zone as subnet"],
747+
"ubuntu2404",
748+
[
749+
"Capacity reservation .* has platform Linux/UNIX, which is not compatible with "
750+
+ "the cluster OS ubuntu2404. Please use a reservation with platform Ubuntu Pro.",
751+
"Capacity reservation .* must use the same availability zone as subnet",
752+
],
739753
),
740754
# Both instance type and availability zone are wrong
741755
(
742-
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
756+
CapacityReservationInfo(
757+
{"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b", "InstancePlatform": "Ubuntu Pro"}
758+
),
743759
["c5.xlarge"],
744760
False,
745761
"us-east-1a",
746762
CapacityType.ONDEMAND,
763+
"ubuntu2204",
747764
[
748765
"Capacity reservation .* must have the same instance type as c5.xlarge.",
749766
"Capacity reservation .* must use the same availability zone as subnet",
750767
],
751768
),
752769
(
753-
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
770+
CapacityReservationInfo(
771+
{
772+
"InstanceType": "m5.xlarge",
773+
"AvailabilityZone": "us-east-1b",
774+
"InstancePlatform": "Red Hat Enterprise Linux",
775+
}
776+
),
754777
["c5.xlarge"],
755778
False,
756779
"us-east-1a",
757780
CapacityType.SPOT,
758-
["Capacity reservation .* must use the same availability zone as subnet"],
781+
"ubuntu2004",
782+
[
783+
"Capacity reservation .* has platform Red Hat Enterprise Linux, which is not "
784+
+ "compatible with the cluster OS ubuntu2004. Please use a reservation with platform Ubuntu Pro.",
785+
"Capacity reservation .* must use the same availability zone as subnet",
786+
],
759787
),
760788
# empty instance type, this should not happen because instance type is automatically retrieved when usinc cr-id
761789
(
762-
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
790+
CapacityReservationInfo(
791+
{"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b", "InstancePlatform": "Linux/UNIX"}
792+
),
763793
None,
764794
False,
765795
"us-east-1a",
766796
CapacityType.ONDEMAND,
797+
"alinux2",
767798
[
768799
"Unexpected failure. InstanceType parameter cannot be empty when using CapacityReservationId",
769800
"Capacity reservation .* must use the same availability zone as subnet",
770801
],
771802
),
772803
# empty instance type, this should not happen because instance type is automatically retrieved when usinc cr-id
773804
(
774-
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
805+
CapacityReservationInfo(
806+
{
807+
"InstanceType": "m5.xlarge",
808+
"AvailabilityZone": "us-east-1b",
809+
"InstancePlatform": "Red Hat Enterprise Linux",
810+
}
811+
),
775812
"",
776813
False,
777814
"us-east-1a",
778815
CapacityType.SPOT,
816+
"rocky9",
779817
[
780818
"Unexpected failure. InstanceType parameter cannot be empty when using CapacityReservationId",
781819
"Capacity reservation .* must use the same availability zone as subnet",
782820
],
783821
),
784822
# wrong capacity type
785823
(
786-
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a"}),
824+
CapacityReservationInfo(
825+
{
826+
"InstanceType": "c5.xlarge",
827+
"AvailabilityZone": "us-east-1a",
828+
"InstancePlatform": "Red Hat Enterprise Linux",
829+
}
830+
),
787831
["c5.xlarge"],
788832
False,
789833
"us-east-1a",
790834
CapacityType.CAPACITY_BLOCK,
835+
"rocky8",
791836
[
792837
"Capacity reservation cr-123 is not a Capacity Block reservation. "
793838
"It cannot be used when specifying CapacityType: CAPACITY_BLOCK."
794839
],
795840
),
796841
(
797842
CapacityReservationInfo(
798-
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "capacity-block"}
843+
{
844+
"InstanceType": "c5.xlarge",
845+
"AvailabilityZone": "us-east-1a",
846+
"ReservationType": "capacity-block",
847+
"InstancePlatform": "Ubuntu Pro",
848+
}
799849
),
800850
["c5.xlarge"],
801851
False,
802852
"us-east-1a",
803853
CapacityType.ONDEMAND,
854+
"ubuntu2004",
804855
[], # Do not check Ondemand capacity type
805856
),
806857
(
807858
CapacityReservationInfo(
808-
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "ondemand"}
859+
{
860+
"InstanceType": "c5.xlarge",
861+
"AvailabilityZone": "us-east-1a",
862+
"ReservationType": "ondemand",
863+
"InstancePlatform": "Linux/UNIX",
864+
}
809865
),
810866
["c5.xlarge"],
811867
False,
812868
"us-east-1a",
813869
CapacityType.CAPACITY_BLOCK,
870+
"alinux2",
814871
[
815872
"Capacity reservation cr-123 is not a Capacity Block reservation. "
816873
"It cannot be used when specifying CapacityType: CAPACITY_BLOCK."
@@ -819,43 +876,88 @@ def test_placement_group_validator(
819876
# right capacity type
820877
(
821878
CapacityReservationInfo(
822-
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "ondemand"}
879+
{
880+
"InstanceType": "c5.xlarge",
881+
"AvailabilityZone": "us-east-1a",
882+
"ReservationType": "ondemand",
883+
"InstancePlatform": "Linux/UNIX",
884+
}
823885
),
824886
["c5.xlarge"],
825887
False,
826888
"us-east-1a",
827889
CapacityType.ONDEMAND,
890+
"alinux2023",
828891
[],
829892
),
830893
(
831894
CapacityReservationInfo(
832-
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "capacity-block"}
895+
{
896+
"InstanceType": "c5.xlarge",
897+
"AvailabilityZone": "us-east-1a",
898+
"ReservationType": "capacity-block",
899+
"InstancePlatform": "Linux/UNIX",
900+
}
833901
),
834902
["c5.xlarge"],
835903
False,
836904
"us-east-1a",
837905
CapacityType.CAPACITY_BLOCK,
906+
"alinux2",
838907
[],
839908
),
909+
(
910+
CapacityReservationInfo(
911+
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "InstancePlatform": "SUSE Linux"}
912+
),
913+
["c5.xlarge"],
914+
False,
915+
"us-east-1a",
916+
CapacityType.ONDEMAND,
917+
"alinux2",
918+
[
919+
"Capacity reservation .* has platform SUSE Linux, which is not compatible"
920+
+ " with the cluster OS alinux2. Please use a reservation with platform Linux/UNIX."
921+
],
922+
),
840923
# Flexible instance type, with a single instance and capacity_reservation_id
841924
(
842-
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a"}),
925+
CapacityReservationInfo(
926+
{
927+
"InstanceType": "c5.xlarge",
928+
"AvailabilityZone": "us-east-1a",
929+
"InstancePlatform": "Linux/UNIX with HA",
930+
}
931+
),
843932
["c5.xlarge"],
844933
True,
845934
"us-east-1a",
846935
None,
847-
["CapacityReservationId parameter cannot be used with Instances parameter."],
936+
"alinux2023",
937+
[
938+
"Capacity reservation .* has platform Linux/UNIX with HA, which is not compatible "
939+
+ "with the cluster OS alinux2023. Please use a reservation with platform Linux/UNIX.",
940+
"CapacityReservationId parameter cannot be used with Instances parameter.",
941+
],
848942
),
849943
# Flexible instance type with multiple instance types and capacity_reservation_id
850944
(
851945
CapacityReservationInfo(
852-
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "ondemand"}
946+
{
947+
"InstanceType": "c5.xlarge",
948+
"AvailabilityZone": "us-east-1a",
949+
"ReservationType": "ondemand",
950+
"InstancePlatform": "Ubuntu Pro",
951+
}
853952
),
854953
["c5.xlarge", "m5.2xlarge"],
855954
True,
856955
"us-east-1a",
857956
CapacityType.ONDEMAND,
858-
["CapacityReservationId parameter cannot be used with Instances parameter."],
957+
"ubuntu2004",
958+
[
959+
"CapacityReservationId parameter cannot be used with Instances parameter.",
960+
],
859961
),
860962
],
861963
)
@@ -866,6 +968,7 @@ def test_capacity_reservation_validator(
866968
subnet_availability_zone,
867969
is_flexible,
868970
capacity_type,
971+
os,
869972
expected_messages,
870973
):
871974
mock_aws_api(mocker)
@@ -877,6 +980,7 @@ def test_capacity_reservation_validator(
877980
is_flexible=is_flexible,
878981
subnet="subnet-123",
879982
capacity_type=capacity_type,
983+
os=os,
880984
)
881985
assert_failure_messages(actual_failures, expected_messages)
882986

0 commit comments

Comments
 (0)