Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CHANGELOG
- Add validator that warns against the downsides of disabling in-place updates on compute and login nodes through DevSettings.
- Upgrade jmespath to ~=1.0 (from ~=0.10).
- Upgrade tabulate to <=0.9.0 (from <=0.8.10).
- Add support for p6-b300 instances for all OSs except AL2.
- Add permission `cloudwatch:PutMetricData` to the head node policy so that clustermgtd is able to emit metrics.
- Add alarm on missing clustermgtd heartbeat.

Expand Down
5 changes: 5 additions & 0 deletions cli/src/pcluster/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
UNSUPPORTED_OSES_FOR_MICRO_NANO = ["ubuntu2204", "ubuntu2404", "rhel8", "rocky8", "rhel9", "rocky9"]
UNSUPPORTED_OSES_FOR_P6E_GB200 = ["rhel8", "rocky8", "alinux2", "rhel9"]
SUPPORTED_OSES_FOR_P6E_GB200 = list(set(SUPPORTED_OSES) - set(UNSUPPORTED_OSES_FOR_P6E_GB200))
UNSUPPORTED_OSES_FOR_P6_B300 = ["alinux2"]
SUPPORTED_OSES_FOR_P6_B300 = list(set(SUPPORTED_OSES) - set(UNSUPPORTED_OSES_FOR_P6_B300))
UNSUPPORTED_OSES_FOR_DCV = []
UNSUPPORTED_OSES_FOR_NON_GPU_DCV = ["rocky9", "rhel9"]
UNSUPPORTED_ARM_OSES_FOR_DCV = []
Expand Down Expand Up @@ -352,6 +354,9 @@ class Operation(Enum):
PCLUSTER_BUILD_IMAGE_CLEANUP_ROLE_BOOTSTRAP_TAG_KEY = "parallelcluster:build-image-cleanup-role-bootstrapped"

P6E_GB200 = "p6e-gb200"
P6_B300 = "p6-b300"
Copy link
Contributor

@gmarciani gmarciani Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when we introduced GB200 we did for a limited set of OSs. Are we sure we do not have any OS limitation for B300? For instance, according to documentation for NVIDIA 580, AL2 is not mentioned as a supported OS: https://docs.nvidia.com/datacenter/tesla/tesla-release-notes-580-126-09/index.html

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I addded a validator to prevent al2 and b300 from being used together.

INSTANCE_TYPES_WITH_FIRST_INTERFACE_ENA = [P6E_GB200, P6_B300]

ULTRASERVER_INSTANCE_PREFIX_LIST = [P6E_GB200]
# Dictionary mapping ultraserver instance prefixes to their allowed capacity block sizes
ULTRASERVER_CAPACITY_BLOCK_ALLOWED_SIZE_DICT = {
Expand Down
25 changes: 19 additions & 6 deletions cli/src/pcluster/templates/queues_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
from pcluster.config.common import DefaultUserHomeType, SharedStorageType
from pcluster.constants import (
DEFAULT_EPHEMERAL_DIR,
INSTANCE_TYPES_WITH_FIRST_INTERFACE_ENA,
NODE_BOOTSTRAP_TIMEOUT,
OS_MAPPING,
P6_B300,
P6E_GB200,
PCLUSTER_COMPUTE_RESOURCE_NAME_TAG,
PCLUSTER_QUEUE_NAME_TAG,
Expand Down Expand Up @@ -368,9 +370,12 @@ def add_network_interfaces(
queue_lt_security_groups,
):
"""Generate launch template network interfaces list."""
is_gb200 = compute_resource.instance_types[0].split(".")[0] == P6E_GB200
instance_family = compute_resource.instance_types[0].split(".")[0]
is_gb200 = instance_family == P6E_GB200
is_b300 = instance_family == P6_B300
efa_enabled = compute_resource.efa and compute_resource.efa.enabled
interface_type = "efa" if efa_enabled and not is_gb200 else None
# gb200 and b300 instances require the first interface to be ENA even if EFA is enabled
interface_type = "efa" if efa_enabled and instance_family not in INSTANCE_TYPES_WITH_FIRST_INTERFACE_ENA else None

compute_lt_nw_interfaces = [
ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
Expand All @@ -390,10 +395,18 @@ def add_network_interfaces(
if is_gb200 and not efa_enabled and not even:
continue

interface_type = "efa" if efa_enabled else None
# if efa is enabled with a gb200 instance, even indexes are configured as efa and the odd as efa-only
if is_gb200 and efa_enabled:
interface_type = "efa" if even else "efa-only"
if efa_enabled:
if is_b300:
# if efa is enabled with a b300 instance, all network cards, except for the primary,
# are configured as efa-only
interface_type = "efa-only"
elif is_gb200:
# if efa is enabled with a gb200 instance, even indexes are configured as efa and the odd as efa-only
interface_type = "efa" if even else "efa-only"
else:
interface_type = "efa"
else:
interface_type = None

compute_lt_nw_interfaces.append(
ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
Expand Down
9 changes: 9 additions & 0 deletions cli/src/pcluster/validators/ec2_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
CAPACITY_BLOCK_INACTIVE_STATES,
CAPACITY_RESERVATION_OS_MAP,
NVIDIA_OPENRM_UNSUPPORTED_INSTANCE_TYPES,
P6_B300,
SUPPORTED_OSES_FOR_P6_B300,
SUPPORTED_OSES_FOR_P6E_GB200,
ULTRASERVER_CAPACITY_BLOCK_ALLOWED_SIZE_DICT,
ULTRASERVER_INSTANCE_PREFIX_LIST,
UNSUPPORTED_OSES_FOR_MICRO_NANO,
UNSUPPORTED_OSES_FOR_P6_B300,
UNSUPPORTED_OSES_FOR_P6E_GB200,
)
from pcluster.utils import get_needed_ultraserver_capacity_block_statuses, get_resource_name_from_resource_arn
Expand Down Expand Up @@ -211,6 +214,12 @@ def _validate(self, instance_type: str, os: str):
" Please use one of the following OS: {2}".format(instance_type, os, SUPPORTED_OSES_FOR_P6E_GB200),
FailureLevel.ERROR,
)
if instance_type.startswith(P6_B300) and os in UNSUPPORTED_OSES_FOR_P6_B300:
self._add_failure(
"The instance type {0} is not supported with OS {1}."
" Please use one of the following OS: {2}".format(instance_type, os, SUPPORTED_OSES_FOR_P6_B300),
FailureLevel.ERROR,
)


class KeyPairValidator(Validator):
Expand Down
22 changes: 22 additions & 0 deletions cli/tests/pcluster/templates/test_queues_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,28 @@ def maximum_network_interfaces(self):
{"network_card_index": 2, "interface_type": None, "device_index": 1},
],
),
(
True,
"p6-b300.WHATEVER_SIZE",
[NetworkCard(0, 4), NetworkCard(1, 4), NetworkCard(2, 4), NetworkCard(3, 4), NetworkCard(4, 4)],
[
{"network_card_index": 0, "interface_type": None, "device_index": 0},
{"network_card_index": 1, "interface_type": "efa-only", "device_index": 1},
{"network_card_index": 2, "interface_type": "efa-only", "device_index": 1},
{"network_card_index": 3, "interface_type": "efa-only", "device_index": 1},
{"network_card_index": 4, "interface_type": "efa-only", "device_index": 1},
],
),
(
False,
"p6-b300.WHATEVER_SIZE",
[NetworkCard(0, 4), NetworkCard(1, 4), NetworkCard(2, 4)],
[
{"network_card_index": 0, "interface_type": None, "device_index": 0},
{"network_card_index": 1, "interface_type": None, "device_index": 1},
{"network_card_index": 2, "interface_type": None, "device_index": 1},
],
),
],
)
def test_add_compute_resource_launch_template(
Expand Down
11 changes: 11 additions & 0 deletions cli/tests/pcluster/validators/test_ec2_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,17 @@ def test_instance_type_base_ami_compatible_validator(
"rocky9",
None,
),
(
"p6-b300.WHATEVER_SIZE",
"alinux2",
"The instance type p6-b300.WHATEVER_SIZE is not supported with OS alinux2. "
"Please use one of the following OS",
),
(
"p6-b300.WHATEVER_SIZE",
"alinux2023",
None,
),
],
)
def test_instance_type_os_compatible_validator(instance_type, os, expected_message):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ rm -rf /shared/${1}

module load ${1}
NCCL_BENCHMARKS_VERSION='2.17.1'
NCCL_VERSION='2.28.3-1'
NCCL_VERSION='2.28.9-1'
MPI_HOME=$(which mpirun | awk -F '/bin' '{print $1}')
NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90,code=sm_90 -gencode=arch=compute_90,code=compute_90" # Arch for NVIDIA A100 and H100, ref https://docs.nvidia.com/cuda/ada-compatibility-guide/index.html

Expand All @@ -28,8 +28,8 @@ NVCC_GENCODE="${NVCC_GENCODE}" make MPI=1 MPI_HOME=${MPI_HOME} NCCL_HOME=/shared

# Compile OFI NCCL plugin for RHEL and Rocky because EFA doesn't ship the plugin on the OSes
. /etc/os-release
if [[ $ID==rhel || $ID==rocky ]]; then
OFI_NCCL_VERSION='1.16.3'
if [[ "$ID" == "rhel" || "$ID" == "rocky" ]]; then
OFI_NCCL_VERSION='1.18.0'
wget https://github.com/aws/aws-ofi-nccl/archive/v${OFI_NCCL_VERSION}.tar.gz
tar xvfz v${OFI_NCCL_VERSION}.tar.gz
cd aws-ofi-nccl-${OFI_NCCL_VERSION}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#SBATCH --exclusive

module load openmpi
NCCL_VERSION='2.28.3-1'
NCCL_VERSION='2.28.9-1'
NCCL_BENCHMARKS_VERSION='2.17.1'

. /etc/os-release
Expand Down
1 change: 1 addition & 0 deletions tests/integration-tests/tests/common/nccl_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def install_and_run_nccl_benchmarks(remote_command_executor, mpi_module, schedul
# p5.48xlarge - Expected "in-place busbw" bandwidth with 2 nodes, 8 tasks per node is about 250GB/s
"p5.48xlarge": 250.0,
"p6-b200.48xlarge": 570, # Initial testing performance 631.17
"p6-b300.48xlarge": 675, # Initial testing performance 698.7
"p6e-gb200.36xlarge": 650, # Initial testing performance 719.17
}

Expand Down
11 changes: 7 additions & 4 deletions tests/integration-tests/tests/efa/test_efa.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,13 @@ def test_efa(
# when the instance type is available in open capacity pool
head_node_instance = instance
max_queue_size = 2
p6_b200_capacity_reservation_id = None
if instance == "p6-b200.48xlarge":
capacity_reservation_id = None
# p family instances need capacity blocks and so placement group is set to false
placement_group_enabled = not instance.startswith("p")
if instance in ("p6-b200.48xlarge", "p6-b300.48xlarge"):
capacity_reservations_ids = get_capacity_reservation_id(request, instance, region, max_queue_size, os)
if capacity_reservations_ids:
p6_b200_capacity_reservation_id = capacity_reservations_ids[0].get("CapacityReservationId")
capacity_reservation_id = capacity_reservations_ids[0].get("CapacityReservationId")
else:
message = f"Skipping the test as no Capacity Block for {instance} and os {os} was found in {region}"
logging.warn(message)
Expand All @@ -75,7 +77,8 @@ def test_efa(
cluster_config = pcluster_config_reader(
head_node_instance=head_node_instance,
max_queue_size=max_queue_size,
p6_b200_capacity_reservation_id=p6_b200_capacity_reservation_id,
capacity_reservation_id=capacity_reservation_id,
placement_group_enabled=placement_group_enabled,
)
cluster = clusters_factory(cluster_config)
remote_command_executor = RemoteCommandExecutor(cluster)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -ex
FABTESTS_DIR="$1"

FABTESTS_REPO="https://github.com/ofiwg/libfabric.git"
FABTESTS_VERSION="2.3.0"
FABTESTS_VERSION="2.4.0"
FABTESTS_SOURCES_DIR="$FABTESTS_DIR/sources"
LIBFABRIC_DIR="/opt/amazon/efa"
CUDA_DIR="/usr/local/cuda"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ Scheduling:
Scheduler: {{ scheduler }}
SlurmQueues:
- Name: efa-enabled
{% if p6_b200_capacity_reservation_id %}
{% if capacity_reservation_id %}
CapacityType: CAPACITY_BLOCK
CapacityReservationTarget:
CapacityReservationId: {{ p6_b200_capacity_reservation_id }}
CapacityReservationId: {{ capacity_reservation_id }}
{% endif %}
Networking:
PlacementGroup:
Enabled: {% if instance not in ["p4d.24xlarge", "p6-b200.48xlarge"] %}true{% else %}false{% endif %}
Enabled: {{ placement_group_enabled }}
{% if instance in ["c5n.18xlarge", "c6gn.16xlarge"] %}Name: {{ capacity_reservation_framework_placement_group }}{% endif %}
SubnetIds:
- {{ private_subnet_id }}
Expand Down
Loading