Skip to content

Commit 6c68565

Browse files
committed
[DRAFT] Pyxis fix.
Signed-off-by: Giacomo Marciani <[email protected]>
1 parent 0963973 commit 6c68565

File tree

15 files changed

+159
-106
lines changed

15 files changed

+159
-106
lines changed

cookbooks/aws-parallelcluster-platform/attributes/platform.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
# ArmPL
1010
default['conditions']['arm_pl_supported'] = arm_instance?
1111

12-
# Enroot + Pyxis
12+
# Enroot
1313
default['cluster']['enroot']['version'] = '3.4.1'
14-
default['cluster']['pyxis']['version'] = '0.20.0'
14+
default['cluster']['enroot']['temporary_dir'] = '/run/enroot'
15+
default['cluster']['enroot']['persistent_dir'] = '/var/enroot'
1516

1617
# NVidia
1718
default['cluster']['nvidia']['enabled'] = 'no'

cookbooks/aws-parallelcluster-platform/recipes/config.rb

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,3 @@
2626
include_recipe 'aws-parallelcluster-platform::supervisord_config'
2727
fetch_config 'Fetch and load cluster configs'
2828
include_recipe 'aws-parallelcluster-platform::config_login' if node['cluster']['node_type'] == 'LoginNode'
29-
enroot 'Configure Enroot' do
30-
action :configure
31-
end

cookbooks/aws-parallelcluster-platform/recipes/install/directories.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
directory node['cluster']['license_dir']
2222
directory node['cluster']['configs_dir']
2323
directory node['cluster']['shared_dir']
24+
directory node['cluster']['examples_dir']
2425
directory node['cluster']['shared_dir_login_nodes']
2526

2627
# Create ParallelCluster log folder

cookbooks/aws-parallelcluster-platform/resources/enroot/partial/_enroot_common.rb

Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# frozen_string_literal: true
22
#
3-
# Copyright:: 2013-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License").
66
# You may not use this file except in compliance with the License.
@@ -18,50 +18,36 @@
1818
action :setup do
1919
return if on_docker?
2020
action_install_package
21-
end
22-
23-
action :configure do
24-
return if on_docker?
25-
return unless enroot_installed
2621

27-
cookbook_file "/tmp/enroot.template.conf" do
28-
source 'enroot/enroot.template.conf'
29-
cookbook 'aws-parallelcluster-platform'
22+
template "/etc/enroot/enroot.conf" do
23+
source 'enroot/enroot.conf.erb'
3024
owner 'root'
3125
group 'root'
32-
mode '0755'
33-
action :create_if_missing
26+
mode '0644'
3427
end
3528

36-
bash "Configure enroot" do
37-
user 'root'
38-
code <<-ENROOT_CONFIGURE
39-
set -e
40-
ENROOT_CONFIG_RELEASE=pyxis
41-
SHARED_DIR=#{node['cluster']['shared_dir']}
42-
NONROOT_USER=#{node['cluster']['cluster_user']}
43-
mkdir -p ${SHARED_DIR}/enroot
44-
chown ${NONROOT_USER} ${SHARED_DIR}/enroot
45-
ENROOT_CACHE_PATH=${SHARED_DIR}/enroot envsubst < /tmp/enroot.template.conf > /tmp/enroot.conf
46-
mv /tmp/enroot.conf /etc/enroot/enroot.conf
47-
chmod 0644 /etc/enroot/enroot.conf
48-
49-
mkdir -p /tmp/enroot
50-
chmod 1777 /tmp/enroot
51-
mkdir -p /tmp/enroot/data
52-
chmod 1777 /tmp/enroot/data
53-
54-
chmod 1777 ${SHARED_DIR}/enroot
29+
directory node['cluster']['enroot']['persistent_dir'] do
30+
owner 'root'
31+
group 'root'
32+
mode '1777'
33+
recursive true
34+
end
5535

56-
mkdir -p ${SHARED_DIR}/pyxis/
57-
chown ${NONROOT_USER} ${SHARED_DIR}/pyxis/
58-
sed -i '${s/$/ runtime_path=${SHARED_DIR}\\/pyxis/}' /opt/slurm/etc/plugstack.conf.d/pyxis.conf
59-
SHARED_DIR=${SHARED_DIR} envsubst < /opt/slurm/etc/plugstack.conf.d/pyxis.conf > /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf
60-
mv /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf
36+
directory node['cluster']['enroot']['temporary_dir'] do
37+
owner 'root'
38+
group 'root'
39+
mode '1777'
40+
recursive true
41+
end
6142

62-
ENROOT_CONFIGURE
63-
retries 3
64-
retry_delay 5
43+
# We assume the Enroot temporary dir to be a temporary folder in /run.
44+
# Folders in /run must be defined in /usr/lib/tmpfiles.d, otherwise they get
45+
# deleted on node boot.
46+
template "/usr/lib/tmpfiles.d/enroot.conf" do
47+
source 'enroot/tmpfiles/enroot.conf.erb'
48+
owner 'root'
49+
group 'root'
50+
mode '0644'
6551
end
6652
end
6753

cookbooks/aws-parallelcluster-platform/spec/unit/resources/enroot_spec.rb

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,6 @@ def self.setup(chef_run)
99
end
1010
end
1111
end
12-
13-
def self.configure(chef_run)
14-
chef_run.converge_dsl('aws-parallelcluster-platform') do
15-
enroot 'configure' do
16-
action :configure
17-
end
18-
end
19-
end
2012
end
2113

2214
describe 'enroot:package_version' do
@@ -128,44 +120,3 @@ def self.configure(chef_run)
128120
end
129121
end
130122
end
131-
132-
describe 'enroot:configure' do
133-
for_all_oses do |platform, version|
134-
context "on #{platform}#{version}" do
135-
let(:chef_run) do
136-
runner(platform: platform, version: version, step_into: ['enroot'])
137-
end
138-
139-
context 'when enroot is installed' do
140-
before do
141-
stubs_for_provider('enroot') do |resource|
142-
allow(resource).to receive(:enroot_installed).and_return(true)
143-
end
144-
ConvergeEnroot.configure(chef_run)
145-
end
146-
it 'run configure enroot script' do
147-
is_expected.to run_bash('Configure enroot')
148-
.with(retries: 3)
149-
.with(retry_delay: 5)
150-
.with(user: 'root')
151-
end
152-
end
153-
154-
context 'when enroot is not installed' do
155-
before do
156-
stubs_for_provider('enroot') do |resource|
157-
allow(resource).to receive(:enroot_installed).and_return(false)
158-
end
159-
ConvergeEnroot.configure(chef_run)
160-
end
161-
162-
it 'does not run configure enroot script' do
163-
is_expected.not_to run_bash('Configure enroot')
164-
.with(retries: 3)
165-
.with(retry_delay: 5)
166-
.with(user: 'root')
167-
end
168-
end
169-
end
170-
end
171-
end

cookbooks/aws-parallelcluster-platform/files/enroot/enroot.template.conf renamed to cookbooks/aws-parallelcluster-platform/templates/enroot/enroot.conf.erb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#ENROOT_LIBRARY_PATH /usr/lib/enroot
22
#ENROOT_SYSCONF_PATH /etc/enroot
3-
ENROOT_RUNTIME_PATH /tmp/enroot/user-$(id -u)
4-
ENROOT_CONFIG_PATH ${ENROOT_CONFIG_PATH}
5-
ENROOT_CACHE_PATH ${ENROOT_CACHE_PATH}
6-
ENROOT_DATA_PATH /tmp/enroot/data/user-$(id -u)
3+
ENROOT_RUNTIME_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/runtime/user-$(id -u)
4+
ENROOT_DATA_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/data/user-$(id -u)
5+
ENROOT_CONFIG_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/config/user-$(id -u)
6+
ENROOT_CACHE_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/cache/group-$(id -g)
77
#ENROOT_TEMP_PATH ${TMPDIR:-/tmp}
88

99
# Gzip program used to uncompress digest layers.
@@ -68,4 +68,4 @@ ENROOT_RESTRICT_DEV no
6868
#all_proxy
6969
#no_proxy
7070
#http_proxy
71-
#https_proxy
71+
#https_proxy
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
D <%= node['cluster']['enroot']['temporary_dir'] %> 0777 root root

cookbooks/aws-parallelcluster-platform/test/controls/enroot_spec.rb

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,37 @@
1414

1515
expected_enroot_version = node['cluster']['enroot']['version']
1616

17-
describe "gdrcopy version is expected to be #{expected_enroot_version}" do
17+
describe "enroot version is expected to be #{expected_enroot_version}" do
1818
subject { command('enroot version').stdout.strip() }
1919
it { should eq expected_enroot_version }
2020
end
21+
22+
base_dir1 = "/etc/enroot"
23+
etc_dirs = [ base_dir1, "#{base_dir1}/enroot-cache"]
24+
25+
etc_dirs.each do |path|
26+
describe directory(path) do
27+
it { should exist }
28+
its('mode') { should cmp '01777' }
29+
its('owner') { should eq 'root' }
30+
its('group') { should eq 'root' }
31+
end
32+
end
33+
34+
base_dir2 = "/run/enroot"
35+
tmp_dirs = [ base_dir2, "#{base_dir2}/data" ]
36+
tmp_dirs.each do |path|
37+
describe directory(path) do
38+
it { should exist }
39+
its('mode') { should cmp '01777' }
40+
end
41+
end
2142
end
2243

2344
control 'tag:config_enroot_enabled_on_graphic_instances' do
2445
only_if { !os_properties.on_docker? && ['yes', true].include?(node['cluster']['nvidia']['enabled']) }
2546

26-
describe file("/opt/parallelcluster/shared/enroot") do
47+
describe file("/etc/enroot/enroot-cache") do
2748
it { should exist }
2849
its('group') { should eq 'root' }
2950
end unless os_properties.redhat_on_docker?

cookbooks/aws-parallelcluster-shared/attributes/cluster.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
default['cluster']['license_dir'] = "#{node['cluster']['base_dir']}/licenses"
55
default['cluster']['configs_dir'] = "#{node['cluster']['base_dir']}/configs"
66
default['cluster']['shared_dir'] = "#{node['cluster']['base_dir']}/shared"
7+
default['cluster']['examples_dir'] = "#{node['cluster']['base_dir']}/examples"
78
default['cluster']['shared_dir_login_nodes'] = "#{node['cluster']['base_dir']}/shared_login_nodes"
89
default['cluster']['log_base_dir'] = '/var/log/parallelcluster'
910
default['cluster']['etc_dir'] = '/etc/parallelcluster'

cookbooks/aws-parallelcluster-slurm/attributes/slurm_attributes.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,10 @@
1818

1919
# Slurmdbd
2020
default['cluster']['slurmdbd_service_enabled'] = "true"
21+
22+
# Spank
23+
default['cluster']['slurm']['spank_config_dir'] = "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d"
24+
25+
# Pyxis
26+
default['cluster']['pyxis']['version'] = '0.20.0'
27+
default['cluster']['pyxis']['runtime_path'] = '/run/pyxis'

0 commit comments

Comments
 (0)