Skip to content

Commit 45375f9

Browse files
Reconfigure all clusters if slurn has topo issues
1 parent fa869b5 commit 45375f9

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

bin/slurm_config.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,18 @@ then
1919
sudo /usr/sbin/slurmctld -c
2020
fi
2121
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook $playbooks_path/slurm_config.yml
22+
if [[ ${@: -1} == "--INITIAL" || ${@: -1} == "--initial" || ${@: -1} == "-INITIAL" || ${@: -1} == "-initial" ]]
23+
then
24+
for inventory in /opt/oci-hpc/autoscaling/clusters/*/inventory ;
25+
do
26+
if [ -f $(dirname $inventory)/currently* ]
27+
then
28+
echo "Cluster is not in running state"
29+
else
30+
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook $playbooks_path/slurm_config_as.yml -i $inventory
31+
fi
32+
done
33+
fi
2234
else
2335
echo "There are some duplicates instance_keyword lines, please make them unique"
2436
fi

playbooks/slurm_config_as.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- hosts: compute
2+
vars:
3+
destroy: false
4+
initial: false
5+
download_path: "{{ '/nfs/cluster/' if cluster_nfs|bool else '/tmp' }}"
6+
enroot_top_path: "{{ nvme_path }}/enroot/"
7+
vars_files:
8+
- "/opt/oci-hpc/conf/queues.conf"
9+
tasks:
10+
- include_role:
11+
name: slurm
12+
when: slurm|default(false)|bool

0 commit comments

Comments
 (0)