Skip to content

Commit cdaf93a

Browse files
committed
[DRAFT] Pyxis fix.
Signed-off-by: Giacomo Marciani <[email protected]>
1 parent 0963973 commit cdaf93a

File tree

13 files changed

+134
-108
lines changed

13 files changed

+134
-108
lines changed

cookbooks/aws-parallelcluster-platform/attributes/platform.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99
# ArmPL
1010
default['conditions']['arm_pl_supported'] = arm_instance?
1111

12-
# Enroot + Pyxis
12+
# Enroot
1313
default['cluster']['enroot']['version'] = '3.4.1'
14-
default['cluster']['pyxis']['version'] = '0.20.0'
14+
default['cluster']['enroot']['local_dir'] = '/var/enroot'
15+
# default['cluster']['enroot']['local_dir'] = '/run/enroot'
16+
17+
1518

1619
# NVidia
1720
default['cluster']['nvidia']['enabled'] = 'no'

cookbooks/aws-parallelcluster-platform/recipes/config.rb

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,3 @@
2626
include_recipe 'aws-parallelcluster-platform::supervisord_config'
2727
fetch_config 'Fetch and load cluster configs'
2828
include_recipe 'aws-parallelcluster-platform::config_login' if node['cluster']['node_type'] == 'LoginNode'
29-
enroot 'Configure Enroot' do
30-
action :configure
31-
end

cookbooks/aws-parallelcluster-platform/recipes/install/directories.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
directory node['cluster']['license_dir']
2222
directory node['cluster']['configs_dir']
2323
directory node['cluster']['shared_dir']
24+
directory node['cluster']['examples_dir']
2425
directory node['cluster']['shared_dir_login_nodes']
2526

2627
# Create ParallelCluster log folder

cookbooks/aws-parallelcluster-platform/resources/enroot/partial/_enroot_common.rb

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# frozen_string_literal: true
22
#
3-
# Copyright:: 2013-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License").
66
# You may not use this file except in compliance with the License.
@@ -18,50 +18,19 @@
1818
action :setup do
1919
return if on_docker?
2020
action_install_package
21-
end
22-
23-
action :configure do
24-
return if on_docker?
25-
return unless enroot_installed
2621

27-
cookbook_file "/tmp/enroot.template.conf" do
28-
source 'enroot/enroot.template.conf'
29-
cookbook 'aws-parallelcluster-platform'
22+
template "/etc/enroot/enroot.conf" do
23+
source 'enroot/enroot.conf.erb'
3024
owner 'root'
3125
group 'root'
32-
mode '0755'
33-
action :create_if_missing
26+
mode '0644'
3427
end
3528

36-
bash "Configure enroot" do
37-
user 'root'
38-
code <<-ENROOT_CONFIGURE
39-
set -e
40-
ENROOT_CONFIG_RELEASE=pyxis
41-
SHARED_DIR=#{node['cluster']['shared_dir']}
42-
NONROOT_USER=#{node['cluster']['cluster_user']}
43-
mkdir -p ${SHARED_DIR}/enroot
44-
chown ${NONROOT_USER} ${SHARED_DIR}/enroot
45-
ENROOT_CACHE_PATH=${SHARED_DIR}/enroot envsubst < /tmp/enroot.template.conf > /tmp/enroot.conf
46-
mv /tmp/enroot.conf /etc/enroot/enroot.conf
47-
chmod 0644 /etc/enroot/enroot.conf
48-
49-
mkdir -p /tmp/enroot
50-
chmod 1777 /tmp/enroot
51-
mkdir -p /tmp/enroot/data
52-
chmod 1777 /tmp/enroot/data
53-
54-
chmod 1777 ${SHARED_DIR}/enroot
55-
56-
mkdir -p ${SHARED_DIR}/pyxis/
57-
chown ${NONROOT_USER} ${SHARED_DIR}/pyxis/
58-
sed -i '${s/$/ runtime_path=${SHARED_DIR}\\/pyxis/}' /opt/slurm/etc/plugstack.conf.d/pyxis.conf
59-
SHARED_DIR=${SHARED_DIR} envsubst < /opt/slurm/etc/plugstack.conf.d/pyxis.conf > /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf
60-
mv /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf
61-
62-
ENROOT_CONFIGURE
63-
retries 3
64-
retry_delay 5
29+
directory node['cluster']['enroot']['local_dir'] do
30+
owner 'root'
31+
group 'root'
32+
mode '1777'
33+
recursive true
6534
end
6635
end
6736

cookbooks/aws-parallelcluster-platform/spec/unit/resources/enroot_spec.rb

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,6 @@ def self.setup(chef_run)
99
end
1010
end
1111
end
12-
13-
def self.configure(chef_run)
14-
chef_run.converge_dsl('aws-parallelcluster-platform') do
15-
enroot 'configure' do
16-
action :configure
17-
end
18-
end
19-
end
2012
end
2113

2214
describe 'enroot:package_version' do
@@ -128,44 +120,3 @@ def self.configure(chef_run)
128120
end
129121
end
130122
end
131-
132-
describe 'enroot:configure' do
133-
for_all_oses do |platform, version|
134-
context "on #{platform}#{version}" do
135-
let(:chef_run) do
136-
runner(platform: platform, version: version, step_into: ['enroot'])
137-
end
138-
139-
context 'when enroot is installed' do
140-
before do
141-
stubs_for_provider('enroot') do |resource|
142-
allow(resource).to receive(:enroot_installed).and_return(true)
143-
end
144-
ConvergeEnroot.configure(chef_run)
145-
end
146-
it 'run configure enroot script' do
147-
is_expected.to run_bash('Configure enroot')
148-
.with(retries: 3)
149-
.with(retry_delay: 5)
150-
.with(user: 'root')
151-
end
152-
end
153-
154-
context 'when enroot is not installed' do
155-
before do
156-
stubs_for_provider('enroot') do |resource|
157-
allow(resource).to receive(:enroot_installed).and_return(false)
158-
end
159-
ConvergeEnroot.configure(chef_run)
160-
end
161-
162-
it 'does not run configure enroot script' do
163-
is_expected.not_to run_bash('Configure enroot')
164-
.with(retries: 3)
165-
.with(retry_delay: 5)
166-
.with(user: 'root')
167-
end
168-
end
169-
end
170-
end
171-
end

cookbooks/aws-parallelcluster-platform/files/enroot/enroot.template.conf renamed to cookbooks/aws-parallelcluster-platform/templates/enroot/enroot.conf.erb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#ENROOT_LIBRARY_PATH /usr/lib/enroot
22
#ENROOT_SYSCONF_PATH /etc/enroot
3-
ENROOT_RUNTIME_PATH /tmp/enroot/user-$(id -u)
4-
ENROOT_CONFIG_PATH ${ENROOT_CONFIG_PATH}
5-
ENROOT_CACHE_PATH ${ENROOT_CACHE_PATH}
6-
ENROOT_DATA_PATH /tmp/enroot/data/user-$(id -u)
3+
ENROOT_RUNTIME_PATH <%= node['cluster']['enroot']['local_dir'] %>/runtime/user-$(id -u)
4+
ENROOT_CONFIG_PATH <%= node['cluster']['enroot']['local_dir'] %>/config/user-$(id -u)
5+
ENROOT_CACHE_PATH <%= node['cluster']['enroot']['local_dir'] %>/cache/group-$(id -g)
6+
ENROOT_DATA_PATH <%= node['cluster']['enroot']['local_dir'] %>/data/user-$(id -u)
77
#ENROOT_TEMP_PATH ${TMPDIR:-/tmp}
88

99
# Gzip program used to uncompress digest layers.
@@ -68,4 +68,4 @@ ENROOT_RESTRICT_DEV no
6868
#all_proxy
6969
#no_proxy
7070
#http_proxy
71-
#https_proxy
71+
#https_proxy

cookbooks/aws-parallelcluster-platform/test/controls/enroot_spec.rb

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,37 @@
1414

1515
expected_enroot_version = node['cluster']['enroot']['version']
1616

17-
describe "gdrcopy version is expected to be #{expected_enroot_version}" do
17+
describe "enroot version is expected to be #{expected_enroot_version}" do
1818
subject { command('enroot version').stdout.strip() }
1919
it { should eq expected_enroot_version }
2020
end
21+
22+
base_dir1 = "/etc/enroot"
23+
etc_dirs = [ base_dir1, "#{base_dir1}/enroot-cache"]
24+
25+
etc_dirs.each do |path|
26+
describe directory(path) do
27+
it { should exist }
28+
its('mode') { should cmp '01777' }
29+
its('owner') { should eq 'root' }
30+
its('group') { should eq 'root' }
31+
end
32+
end
33+
34+
base_dir2 = "/run/enroot"
35+
tmp_dirs = [ base_dir2, "#{base_dir2}/data" ]
36+
tmp_dirs.each do |path|
37+
describe directory(path) do
38+
it { should exist }
39+
its('mode') { should cmp '01777' }
40+
end
41+
end
2142
end
2243

2344
control 'tag:config_enroot_enabled_on_graphic_instances' do
2445
only_if { !os_properties.on_docker? && ['yes', true].include?(node['cluster']['nvidia']['enabled']) }
2546

26-
describe file("/opt/parallelcluster/shared/enroot") do
47+
describe file("/etc/enroot/enroot-cache") do
2748
it { should exist }
2849
its('group') { should eq 'root' }
2950
end unless os_properties.redhat_on_docker?

cookbooks/aws-parallelcluster-shared/attributes/cluster.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
default['cluster']['license_dir'] = "#{node['cluster']['base_dir']}/licenses"
55
default['cluster']['configs_dir'] = "#{node['cluster']['base_dir']}/configs"
66
default['cluster']['shared_dir'] = "#{node['cluster']['base_dir']}/shared"
7+
default['cluster']['examples_dir'] = "#{node['cluster']['base_dir']}/examples"
78
default['cluster']['shared_dir_login_nodes'] = "#{node['cluster']['base_dir']}/shared_login_nodes"
89
default['cluster']['log_base_dir'] = '/var/log/parallelcluster'
910
default['cluster']['etc_dir'] = '/etc/parallelcluster'

cookbooks/aws-parallelcluster-slurm/attributes/slurm_attributes.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,10 @@
1818

1919
# Slurmdbd
2020
default['cluster']['slurmdbd_service_enabled'] = "true"
21+
22+
# Spank
23+
default['cluster']['slurm']['spank_config_dir'] = "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d"
24+
25+
# Pyxis
26+
default['cluster']['pyxis']['version'] = '0.20.0'
27+
default['cluster']['pyxis']['runtime_path'] = '/run/pyxis'

cookbooks/aws-parallelcluster-slurm/recipes/install/install_pyxis.rb

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Cookbook:: aws-parallelcluster-slurm
55
# Recipe:: install_pyxis
66
#
7-
# Copyright:: Amazon.com, Inc. or its affiliates. All Rights Reserved.
7+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
88
#
99
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
1010
# License. A copy of the License is located at
@@ -21,6 +21,9 @@
2121
pyxis_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/pyxis/v#{pyxis_version}.tar.gz"
2222
pyxis_tarball = "#{node['cluster']['sources_dir']}/pyxis-#{pyxis_version}.tar.gz"
2323

24+
spank_examples_dir = "#{node['cluster']['examples_dir']}/spank"
25+
pyxis_examples_dir = "#{node['cluster']['examples_dir']}/pyxis"
26+
2427
remote_file pyxis_tarball do
2528
source pyxis_url
2629
mode '0644'
@@ -35,12 +38,49 @@
3538
set -e
3639
tar xf #{pyxis_tarball} -C /tmp
3740
cd /tmp/pyxis-#{pyxis_version}
38-
CPPFLAGS='-I /opt/slurm/include/' make
39-
CPPFLAGS='-I /opt/slurm/include/' make install
40-
mkdir -p /opt/slurm/etc/plugstack.conf.d
41-
echo -e 'include /opt/slurm/etc/plugstack.conf.d/*' | tee /opt/slurm/etc/plugstack.conf
42-
ln -fs /usr/local/share/pyxis/pyxis.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf
41+
CPPFLAGS='-I #{node['cluster']['slurm']['install_dir']}/include/' make
42+
CPPFLAGS='-I #{node['cluster']['slurm']['install_dir']}/include/' make install
4343
PYXIS_INSTALL
4444
retries 3
4545
retry_delay 5
4646
end
47+
48+
# Spank configurations
49+
50+
directory node['cluster']['slurm']['spank_config_dir'] do
51+
user 'root'
52+
group 'root'
53+
mode '0755'
54+
recursive true
55+
end
56+
57+
directory spank_examples_dir
58+
59+
template "#{spank_examples_dir}/plugstack.conf" do
60+
source 'pyxis/plugstack.conf.erb'
61+
owner 'root'
62+
group 'root'
63+
mode '0644'
64+
end
65+
66+
# Pyxis configurations
67+
68+
directory node['cluster']['pyxis']['runtime_path'] do
69+
user 'root'
70+
group 'root'
71+
mode '0777'
72+
recursive true
73+
end
74+
75+
link '/usr/local/share/pyxis/pyxis.conf' do
76+
to "#{node['cluster']['slurm']['spank_config_dir']}/pyxis.conf"
77+
end
78+
79+
directory pyxis_examples_dir
80+
81+
template "#{pyxis_examples_dir}/pyxis.conf" do
82+
source 'pyxis/pyxis.conf.erb'
83+
owner 'root'
84+
group 'root'
85+
mode '0644'
86+
end

0 commit comments

Comments
 (0)