Skip to content

Commit 4657dfc

Browse files
committed
[BugFix] Fix issue making job fail when submitted as active directory user from login nodes.
Signed-off-by: Giacomo Marciani <[email protected]>
1 parent 51b2ec1 commit 4657dfc

File tree

10 files changed

+202
-10
lines changed

10 files changed

+202
-10
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ This file is used to list changes made in each version of the AWS ParallelCluste
3232
- web_viewer: `2023.1.16388-1`
3333

3434
**BUG FIXES**
35+
- Fix issue making job fail when submitted as active directory user from login nodes.
36+
The issue was caused by an incomplete configuration of the integration with the external Active Directory on the head node.
37+
This fix comes with a breaking change: now cluster creation/update would fail if the integration with the Active Directory does not work.
3538

3639
3.8.0
3740
------

cookbooks/aws-parallelcluster-entrypoints/recipes/finalize.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@
2121
end
2222

2323
include_recipe "aws-parallelcluster-platform::finalize"
24+
include_recipe "aws-parallelcluster-environment::finalize"
2425

2526
include_recipe 'aws-parallelcluster-slurm::finalize' if node['cluster']['scheduler'] == 'slurm'
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
require 'chefspec'
2+
require 'chefspec/berkshelf'
3+
4+
require_relative '../../aws-parallelcluster-shared/spec/spec_helper'
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright:: 2024 Amazon.com, Inc. and its affiliates. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
6+
# License. A copy of the License is located at
7+
#
8+
# http://aws.amazon.com/apache2.0/
9+
#
10+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
11+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
require 'spec_helper'
15+
16+
describe 'aws-parallelcluster-entrypoints::finalize' do
17+
for_all_oses do |platform, version|
18+
context "on #{platform}#{version}" do
19+
for_all_node_types do |node_type|
20+
context "when #{node_type}" do
21+
cached(:chef_run) do
22+
runner = runner(platform: platform, version: version) do |node|
23+
allow_any_instance_of(Object).to receive(:fetch_config).and_return(OpenStruct.new)
24+
allow_any_instance_of(Object).to receive(:is_custom_node?).and_return(true)
25+
26+
node.override['cluster']['node_type'] = node_type
27+
node.override['cluster']['scheduler'] = 'slurm'
28+
end
29+
runner.converge(described_recipe)
30+
end
31+
cached(:node) { chef_run.node }
32+
33+
%w(
34+
aws-parallelcluster-platform::enable_chef_error_handler
35+
aws-parallelcluster-computefleet::custom_parallelcluster_node
36+
aws-parallelcluster-platform::finalize aws-parallelcluster-environment::finalize
37+
aws-parallelcluster-slurm::finalize
38+
).each do |recipe_name|
39+
it "includes the recipe #{recipe_name}" do
40+
# TODO: This assertion requires to refactor all the resources having properties
41+
# aws_region and aws_domain because they are overwriting existing methods
42+
# defined in the aws-parallelcluster-shared cookbook, making the test compilation to fail.
43+
# We must re-enable this assertion once the refactoring has been done.
44+
# is_expected.to include_recipe(recipe_name)
45+
end
46+
end
47+
end
48+
end
49+
end
50+
end
51+
end

cookbooks/aws-parallelcluster-environment/recipes/config/directory_service.rb

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,3 @@
233233
sensitive true
234234
end
235235
end unless on_docker?
236-
237-
if %w(HeadNode LoginNode).include? node['cluster']['node_type']
238-
read_only_user = domain_service_read_only_user_name(node['cluster']['directory_service']['domain_read_only_user'])
239-
240-
execute 'Check AD connection and sync user data with remote directory service' do
241-
command "getent passwd #{read_only_user}"
242-
user 'root'
243-
ignore_failure true
244-
end
245-
end
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
6+
# License. A copy of the License is located at
7+
#
8+
# http://aws.amazon.com/apache2.0/
9+
#
10+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
11+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
include_recipe 'aws-parallelcluster-environment::finalize_directory_service'
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# frozen_string_literal: true
2+
3+
#
4+
# Cookbook:: aws-parallelcluster
5+
# Recipe:: finalize_directory_service
6+
#
7+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
8+
#
9+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
10+
# License. A copy of the License is located at
11+
#
12+
# http://aws.amazon.com/apache2.0/
13+
#
14+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
15+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
return if node['cluster']["directory_service"]["enabled"] == 'false'
19+
20+
if %w(HeadNode LoginNode).include? node['cluster']['node_type']
21+
default_user = node['cluster']['cluster_user']
22+
read_only_user = domain_service_read_only_user_name(node['cluster']['directory_service']['domain_read_only_user'])
23+
24+
execute 'Fetch user data from remote directory service' do
25+
# The switch-user (sudo -u) is necessary to trigger the fetching of AD data
26+
command "sudo -u #{default_user} getent passwd #{read_only_user}"
27+
user 'root'
28+
retries 10 # Retries are just a safe guard in case the node is still fetching data from the AD
29+
retry_delay 3
30+
end
31+
end
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright:: 2024 Amazon.com, Inc. and its affiliates. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
6+
# License. A copy of the License is located at
7+
#
8+
# http://aws.amazon.com/apache2.0/
9+
#
10+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
11+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
require 'spec_helper'
15+
16+
describe 'aws-parallelcluster-environment::finalize_directory_service' do
17+
for_all_oses do |platform, version|
18+
context "on #{platform}#{version}" do
19+
for_all_node_types do |node_type|
20+
cluster_user = 'DEFAULT_CLUSTER_USER'
21+
domain_read_only_user = 'DOMAIN_READ_ONLY_USER'
22+
23+
context "when #{node_type}" do
24+
cached(:chef_run) do
25+
runner = runner(platform: platform, version: version) do |node|
26+
node.override['cluster']['node_type'] = node_type
27+
node.override['cluster']['cluster_user'] = cluster_user
28+
node.override['cluster']['directory_service']['enabled'] = true
29+
30+
allow_any_instance_of(Object).to receive(:domain_service_read_only_user_name).and_return(domain_read_only_user)
31+
end
32+
runner.converge(described_recipe)
33+
end
34+
cached(:node) { chef_run.node }
35+
36+
if %(HeadNode LoginNode).include?(node_type)
37+
it 'fetches user data from remote directory service' do
38+
is_expected.to run_execute('Fetch user data from remote directory service').with(
39+
command: "sudo -u #{cluster_user} getent passwd #{domain_read_only_user}",
40+
user: 'root',
41+
retries: 10,
42+
retry_delay: 3
43+
)
44+
end
45+
else
46+
it 'fetches user data from remote directory service' do
47+
is_expected.not_to run_execute('Fetch user data from remote directory service')
48+
end
49+
end
50+
end
51+
end
52+
end
53+
end
54+
end
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright:: 2024 Amazon.com, Inc. and its affiliates. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
6+
# License. A copy of the License is located at
7+
#
8+
# http://aws.amazon.com/apache2.0/
9+
#
10+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
11+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
require 'spec_helper'
15+
16+
describe 'aws-parallelcluster-environment::finalize' do
17+
for_all_oses do |platform, version|
18+
context "on #{platform}#{version}" do
19+
for_all_node_types do |node_type|
20+
context "when #{node_type}" do
21+
cached(:chef_run) do
22+
runner = runner(platform: platform, version: version) do |node|
23+
node.override['cluster']['node_type'] = node_type
24+
end
25+
runner.converge(described_recipe)
26+
end
27+
cached(:node) { chef_run.node }
28+
29+
["aws-parallelcluster-environment::finalize_directory_service"].each do |recipe_name|
30+
it "includes the recipe #{recipe_name}" do
31+
is_expected.to include_recipe(recipe_name)
32+
end
33+
end
34+
end
35+
end
36+
end
37+
end
38+
end

cookbooks/aws-parallelcluster-shared/spec/spec_helper.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ def for_all_oses
5151
end
5252
end
5353

54+
def for_all_node_types
55+
%w(HeadNode ComputeFleet LoginNode).each do |node_type|
56+
yield(node_type)
57+
end
58+
end
59+
5460
def runner(platform:, version:, step_into: [])
5561
ChefSpec::SoloRunner.new(platform: platform, version: version, step_into: step_into) do |node|
5662
yield node if block_given?

0 commit comments

Comments
 (0)