Skip to content

Commit 8c9c7b7

Browse files
author
Himani Anil Deshpande
committed
[Bug] Use subnet hash for ELB target group so that a new target group is created whenever we update the LoginNode's subnetID
1 parent f08eb02 commit 8c9c7b7

File tree

5 files changed

+171
-12
lines changed

5 files changed

+171
-12
lines changed

cli/src/pcluster/templates/login_nodes_stack.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,10 @@ def _get_launching_lifecycle_hook_specification(self):
404404
)
405405

406406
def _add_login_nodes_pool_target_group(self):
407+
subnet_hash = create_hash_suffix(self._pool.networking.subnet_ids[0])[:8]
407408
return elbv2.NetworkTargetGroup(
408409
self,
409-
f"{self._pool.name}TargetGroup",
410+
f"{self._pool.name}TargetGroup{subnet_hash}",
410411
health_check=elbv2.HealthCheck(
411412
port="22",
412413
protocol=elbv2.Protocol.TCP,
@@ -415,11 +416,14 @@ def _add_login_nodes_pool_target_group(self):
415416
protocol=elbv2.Protocol.TCP,
416417
target_type=elbv2.TargetType.INSTANCE,
417418
vpc=self._vpc,
419+
# AWS Target Group name limit is 32 characters.
420+
# With 3 args: (7 chars * 3) + (2 hyphens) + (1 hyphen) + (8 char hash) = 32 chars
418421
target_group_name=_get_resource_combination_name(
419422
self._config.cluster_name,
420423
self._pool.name,
424+
subnet_hash,
421425
partial_length=7,
422-
hash_length=16,
426+
hash_length=8,
423427
),
424428
)
425429

cli/tests/pcluster/templates/test_cluster_stack.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ def test_login_nodes_traffic_management_resources_values_properties(
876876

877877
auto_scaling_group_id = "clusternametestloginnodespool1clusternametestloginnodespool1AutoScalingGroup5EBA3937"
878878
load_balancer_id = "clusternametestloginnodespool1testloginnodespool1LoadBalancerE1D4FCC7"
879-
target_group_id = "clusternametestloginnodespool1testloginnodespool1TargetGroup713F5EC5"
879+
target_group_id = "clusternametestloginnodespool1testloginnodespool1TargetGroup4592cff378B1099C"
880880
listener_id = (
881881
"clusternametestloginnodespool1testloginnodespool1LoadBalancerLoginNodesListenertestloginnodespool165B4D3DC"
882882
)
@@ -1357,19 +1357,18 @@ def test_custom_munge_key_iam_policy(mocker, test_datadir, config_file_name):
13571357

13581358

13591359
@pytest.mark.parametrize(
1360-
"resource_name_1, resource_name_2, partial_length, hash_length, expected_combination_name",
1360+
"resource_names, partial_length, hash_length, expected_combination_name",
13611361
[
1362-
("test-cluster", "test-pool", 7, 16, "test-cl-test-po-18c74b16dfbc78ac"),
1363-
("abcdefghijk", "lmnopqrst", 8, 14, "abcdefgh-lmnopqrs-dd65eea0329dcb"),
1364-
("a", "b", 7, 16, "a-b-fb8e20fc2e4c3f24"),
1362+
(["test-cluster", "test-pool"], 7, 16, "test-cl-test-po-18c74b16dfbc78ac"),
1363+
(["abcdefghijk", "lmnopqrst"], 8, 14, "abcdefgh-lmnopqrs-dd65eea0329dcb"),
1364+
(["a", "b"], 7, 16, "a-b-fb8e20fc2e4c3f24"),
1365+
# Test with 3 resource names (like target group: cluster_name, pool_name, subnet_hash)
1366+
(["clustername", "loginpool", "092512032a1df0b2c"], 7, 8, "cluster-loginpo-0925120-fde21041"),
13651367
],
13661368
)
1367-
def test_resource_combination_name(
1368-
resource_name_1, resource_name_2, partial_length, hash_length, expected_combination_name
1369-
):
1369+
def test_resource_combination_name(resource_names, partial_length, hash_length, expected_combination_name):
13701370
combination_name = _get_resource_combination_name(
1371-
resource_name_1,
1372-
resource_name_2,
1371+
*resource_names,
13731372
partial_length=partial_length,
13741373
hash_length=hash_length,
13751374
)

cli/tests/pcluster/templates/test_login_nodes_stack.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,86 @@ def render_join(elem: dict):
7878
else:
7979
raise ValueError("Found unsupported item type while rendering Fn::Join")
8080
return sep.join(rendered_body)
81+
82+
83+
@pytest.mark.parametrize(
84+
"config_file_name, keep_original_subnet",
85+
[
86+
("config-1.yaml", True),
87+
("config-2.yaml", False),
88+
],
89+
)
90+
def test_target_group_with_config_files(mocker, test_datadir, config_file_name, keep_original_subnet):
91+
"""
92+
Test target group logical ID and name change when subnet is modified.
93+
94+
This test verifies that:
95+
1. Target group logical ID and name are generated correctly
96+
2. When subnet changes, both logical ID and target group name change
97+
3. Target group name stays within AWS 32 char limit
98+
99+
This ensures that cluster updates with subnet changes create new target groups,
100+
avoiding "target group cannot be associated with more than one load balancer"
101+
and "target group name already exists" errors.
102+
"""
103+
mock_aws_api(mocker)
104+
mock_bucket_object_utils(mocker)
105+
106+
def get_target_group_info(cdk_assets):
107+
"""Extract target group logical ID and name from CDK assets."""
108+
for asset in cdk_assets:
109+
asset_content = asset.get("content")
110+
if asset_content and "Resources" in asset_content:
111+
for resource_name, resource in asset_content["Resources"].items():
112+
if resource.get("Type") == "AWS::ElasticLoadBalancingV2::TargetGroup":
113+
return resource_name, resource["Properties"]["Name"]
114+
return None, None
115+
116+
def build_template(config_yaml):
117+
"""Build CDK template from config yaml."""
118+
cluster_config = ClusterSchema(cluster_name="clustername").load(config_yaml)
119+
_, cdk_assets = CDKTemplateBuilder().build_cluster_template(
120+
cluster_config=cluster_config, bucket=dummy_cluster_bucket(), stack_name="clustername"
121+
)
122+
return cdk_assets
123+
124+
def assert_target_group_info(logical_id, tg_name):
125+
"""Verify target group logical ID and name are not None and within AWS 32 char limit."""
126+
assert_that(logical_id).is_not_none()
127+
assert_that(tg_name).is_not_none()
128+
assert_that(len(tg_name)).is_less_than_or_equal_to(32)
129+
130+
# Read yaml config
131+
input_yaml = load_yaml_dict(test_datadir / config_file_name)
132+
original_subnet = input_yaml["LoginNodes"]["Pools"][0]["Networking"]["SubnetIds"][0]
133+
134+
# Build template with original subnet
135+
cdk_assets_original = build_template(input_yaml)
136+
logical_id_original, tg_name_original = get_target_group_info(cdk_assets_original)
137+
138+
# Verify original target group was created correctly
139+
assert_target_group_info(logical_id_original, tg_name_original)
140+
141+
# Modify subnet to a new value
142+
new_subnet = "subnet-12345678901234567"
143+
new_pool_count = 0
144+
if keep_original_subnet:
145+
new_subnet = original_subnet
146+
new_pool_count = 1
147+
input_yaml["LoginNodes"]["Pools"][0]["Networking"]["SubnetIds"][0] = new_subnet
148+
input_yaml["LoginNodes"]["Pools"][0]["Count"] = new_pool_count
149+
150+
# Build template with new subnet
151+
cdk_assets_new = build_template(input_yaml)
152+
logical_id_new, tg_name_new = get_target_group_info(cdk_assets_new)
153+
154+
if keep_original_subnet:
155+
assert_that(logical_id_new).is_equal_to(logical_id_original)
156+
assert_that(tg_name_new).is_equal_to(tg_name_original)
157+
else:
158+
# Verify that both logical ID and target group name changed when subnet changed
159+
assert_that(logical_id_new).is_not_equal_to(logical_id_original)
160+
assert_that(tg_name_new).is_not_equal_to(tg_name_original)
161+
162+
# Verify new target group was created correctly
163+
assert_target_group_info(logical_id_new, tg_name_new)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
DeploymentSettings:
2+
DisableSudoAccessForDefaultUser: True
3+
DefaultUserHome: Local
4+
Region: us-east-1
5+
Image:
6+
Os: alinux2
7+
LoginNodes:
8+
Pools:
9+
- Name: login
10+
InstanceType: t3.micro
11+
Count: 1
12+
Networking:
13+
SubnetIds:
14+
- subnet-12345678
15+
Dcv:
16+
Enabled: true
17+
Port: 8444
18+
HeadNode:
19+
InstanceType: t3.micro
20+
Networking:
21+
SubnetId: subnet-12345678
22+
Ssh:
23+
KeyName: ec2-key-name
24+
Scheduling:
25+
Scheduler: slurm
26+
SlurmQueues:
27+
- Name: queue1
28+
ComputeResources:
29+
- Name: cr1
30+
InstanceType: c4.xlarge
31+
MinCount: 0
32+
MaxCount: 10
33+
Networking:
34+
SubnetIds:
35+
- subnet-12345678
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
Region: us-east-1
2+
Image:
3+
Os: alinux2
4+
LoginNodes:
5+
Pools:
6+
- Name: login
7+
InstanceType: t3.micro
8+
Count: 1
9+
Networking:
10+
SubnetIds:
11+
- subnet-12345678
12+
Dcv:
13+
Enabled: false
14+
HeadNode:
15+
InstanceType: t3.micro
16+
Networking:
17+
SubnetId: subnet-12345678
18+
Ssh:
19+
KeyName: ec2-key-name
20+
Scheduling:
21+
Scheduler: slurm
22+
SlurmQueues:
23+
- Name: queue1
24+
ComputeResources:
25+
- Name: cr1
26+
InstanceType: c4.xlarge
27+
MinCount: 0
28+
MaxCount: 10
29+
Networking:
30+
SubnetIds:
31+
- subnet-12345678
32+
DirectoryService:
33+
DomainName: corp.pcluster.com
34+
DomainAddr: ldaps://corp.pcluster.com
35+
PasswordSecretArn: arn:aws:secretsmanager:eu-west-1:XXXXXXXXXXXX:secret:XXXXXXXXXX
36+
DomainReadOnlyUser: cn=ReadOnlyUser,ou=Users,ou=CORP,dc=corp,dc=pcluster,dc=com
37+
LdapTlsReqCert: never
38+
GenerateSshKeysForUsers: true

0 commit comments

Comments
 (0)