Skip to content

Commit f8839fe

Browse files
authored
Merge branch 'main' into fix/templating2
2 parents 5d53dd6 + 531b3b9 commit f8839fe

File tree

45 files changed

+345
-103
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+345
-103
lines changed

.github/workflows/stackhpc.yml

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
cloud:
1515
- "arcus" # Arcus OpenStack in rcp-cloud-portal-demo project, with RoCE
1616
fail-fast: false # as want clouds to continue independently
17-
concurrency: ${{ matrix.cloud }}
17+
concurrency: ${{ github.ref }} # to branch/PR
1818
runs-on: ubuntu-20.04
1919
steps:
2020
- uses: actions/checkout@v2
@@ -52,30 +52,52 @@ jobs:
5252
smslabs_CLOUDS_YAML: ${{ secrets.CLOUDS_YAML }}
5353
arcus_CLOUDS_YAML: ${{ secrets.ARCUS_CLOUDS_YAML }}
5454

55-
- name: Provision infrastructure
56-
id: provision
55+
- name: Provision ports, inventory and other infrastructure apart from nodes
56+
id: provision_ports
5757
run: |
5858
. venv/bin/activate
5959
. environments/${{ matrix.cloud }}/activate
6060
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
61-
terraform apply -auto-approve
61+
TF_VAR_create_nodes=false terraform apply -auto-approve
6262
env:
6363
OS_CLOUD: openstack
6464
TF_VAR_cluster_name: ci${{ github.run_id }}
6565

66+
- name: Setup environment-specific inventory/terraform inputs
67+
run: |
68+
. venv/bin/activate
69+
. environments/${{ matrix.cloud }}/activate
70+
ansible-playbook ansible/adhoc/generate-passwords.yml
71+
echo vault_testuser_password: "$TESTUSER_PASSWORD" > $APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/all/test_user.yml
72+
ansible-playbook ansible/adhoc/template-cloud-init.yml
73+
env:
74+
ANSIBLE_FORCE_COLOR: True
75+
TESTUSER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
76+
77+
- name: Provision servers
78+
id: provision_servers
79+
run: |
80+
. venv/bin/activate
81+
. environments/${{ matrix.cloud }}/activate
82+
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
83+
terraform apply -auto-approve
84+
env:
85+
OS_CLOUD: openstack
86+
TF_VAR_cluster_name: ci${{ github.run_id }}
87+
6688
- name: Get server provisioning failure messages
6789
id: provision_failure
6890
run: |
6991
. venv/bin/activate
7092
. environments/${{ matrix.cloud }}/activate
7193
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
7294
TF_FAIL_MSGS="$(../../skeleton/\{\{cookiecutter.environment\}\}/terraform/getfaults.py $PWD)"
73-
echo $TF_FAIL_MSGS
95+
echo TF failure messages: $TF_FAIL_MSGS
7496
echo "::set-output name=messages::${TF_FAIL_MSGS}"
7597
env:
7698
OS_CLOUD: openstack
7799
TF_VAR_cluster_name: ci${{ github.run_id }}
78-
if: always() && steps.provision.outcome == 'failure'
100+
if: always() && steps.provision_servers.outcome == 'failure'
79101

80102
- name: Delete infrastructure if failed due to lack of hosts
81103
run: |
@@ -86,20 +108,17 @@ jobs:
86108
env:
87109
OS_CLOUD: openstack
88110
TF_VAR_cluster_name: ci${{ github.run_id }}
89-
if: ${{ always() && steps.provision.outcome == 'failure' && contains('not enough hosts available', steps.provision_failure.messages) }}
111+
if: ${{ always() && steps.provision_servers.outcome == 'failure' && contains(steps.provision_failure.messages, 'not enough hosts available') }}
90112

91113
- name: Directly configure cluster
92114
run: |
93115
. venv/bin/activate
94116
. environments/${{ matrix.cloud }}/activate
95117
ansible all -m wait_for_connection
96-
ansible-playbook ansible/adhoc/generate-passwords.yml
97-
echo test_user_password: "$TEST_USER_PASSWORD" > $APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/basic_users/defaults.yml
98118
ansible-playbook -vv ansible/site.yml
99119
env:
100120
OS_CLOUD: openstack
101121
ANSIBLE_FORCE_COLOR: True
102-
TEST_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
103122

104123
- name: Run MPI-based tests
105124
run: |
@@ -135,23 +154,24 @@ jobs:
135154
--server-response \
136155
--no-check-certificate \
137156
--http-user=testuser \
138-
--http-password=${TEST_USER_PASSWORD} https://${openondemand_servername} \
157+
--http-password=${TESTUSER_PASSWORD} https://${openondemand_servername} \
139158
2>&1)
140159
(echo $statuscode | grep "200 OK") || (echo $statuscode && exit 1)
141160
env:
142-
TEST_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
161+
TESTUSER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
143162

144163
- name: Build packer images
145164
run: |
146165
. venv/bin/activate
147166
. environments/${{ matrix.cloud }}/activate
148-
echo test_user_password: "$TEST_USER_PASSWORD" > $APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/basic_users/defaults.yml
167+
ansible-playbook ansible/adhoc/generate-passwords.yml
168+
echo vault_testuser_password: "$TESTUSER_PASSWORD" > $APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/all/test_user.yml
149169
cd packer/
150170
PACKER_LOG=1 packer build -on-error=ask -var-file=$PKR_VAR_environment_root/builder.pkrvars.hcl openstack.pkr.hcl
151171
env:
152172
OS_CLOUD: openstack
153173
ANSIBLE_FORCE_COLOR: True
154-
TEST_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
174+
TESTUSER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
155175

156176
- name: Test reimage of nodes
157177
run: |

ansible/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ roles/*
2626
!roles/slurm_exporter/**
2727
!roles/firewalld/
2828
!roles/firewalld/**
29+
!roles/etc_hosts/
30+
!roles/etc_hosts/**
31+
!roles/cloud_init/
32+
!roles/cloud_init/**
2933
!roles/mysql/
3034
!roles/mysql/**
3135
!roles/systemd/

ansible/adhoc/hpctests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
---
66

7-
- hosts: hpctests[0] # TODO: might want to make which node is used selectable?
7+
- hosts: login[0] # TODO: might want to make which node is used selectable?
88
become: false
99
gather_facts: false
1010
tasks:
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
- hosts: cloud_init
2+
become: no
3+
gather_facts: no
4+
tasks:
5+
- name: Template out cloud-init userdata
6+
import_role:
7+
name: cloud_init
8+
tasks_from: template.yml
9+
delegate_to: localhost

ansible/bootstrap.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,19 @@
1313
to update these variable names. ** NB: The actual secrets will not be changed.**
1414
when: "'secrets_openhpc_' in (hostvars[inventory_hostname] | join)"
1515

16+
- hosts: etc_hosts
17+
gather_facts: false
18+
tags: etc_hosts
19+
become: yes
20+
tasks:
21+
- name: Template /etc/hosts
22+
copy:
23+
content: "{{ etc_hosts_template }}"
24+
dest: /etc/hosts
25+
owner: root
26+
group: root
27+
mode: u=rw,og=r
28+
1629
- hosts: cluster
1730
gather_facts: false
1831
tasks:
@@ -74,6 +87,20 @@
7487
- import_role:
7588
name: fail2ban
7689

90+
- name: Setup podman
91+
hosts: podman
92+
tags: podman
93+
tasks:
94+
- import_role:
95+
name: podman
96+
tasks_from: prereqs.yml
97+
tags: prereqs
98+
99+
- import_role:
100+
name: podman
101+
tasks_from: config.yml
102+
tags: config
103+
77104
- hosts: update
78105
gather_facts: false
79106
become: yes

ansible/ci/test_reimage.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@
4545
gather_facts: no
4646
tags: reimage_compute
4747
tasks:
48-
# TODO: This is specific to smslabs/arcus environment config - could generalise to all compute nodes
48+
# TODO: This is specific to arcus environment config - could generalise to all compute nodes
4949
- name: Request compute node rebuild via Slurm
5050
shell:
51-
cmd: scontrol reboot ASAP nextstate=RESUME reason='rebuild image:{{ compute_build.artifact_id }}' {{ openhpc_cluster_name }}-compute-[0-1]
51+
cmd: scontrol reboot ASAP nextstate=RESUME reason='rebuild image:{{ compute_build.artifact_id }}' {{ openhpc_cluster_name }}-compute-[0-3]
5252
become: yes
5353

5454
- name: Check compute node rebuild completed

ansible/monitoring.yml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,6 @@
11
# ---
22
# # NOTE: Requires slurmdbd
33

4-
- name: Setup podman
5-
hosts: podman
6-
tags: podman
7-
tasks:
8-
- import_role:
9-
name: podman
10-
tasks_from: prereqs.yml
11-
tags: prereqs
12-
13-
- import_role:
14-
name: podman
15-
tasks_from: config.yml
16-
tags: config
17-
184
- name: Setup elasticsearch
195
hosts: opendistro
206
tags: opendistro

ansible/roles/cloud_init/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# cloud_init
2+
3+
Create cloud init userdata for instance groups.
4+
5+
# Requirements
6+
Image and cloud environment supporting cloud-init.
7+
8+
# Role Variables
9+
10+
- `cloud_init_output_path`: Required. Path to output userdata files to.
11+
- `cloud_init_userdata_templates`: Optional list. Each element is a dict with keys/values as follows:
12+
- `module`: Required str. Name of cloud_init [module](https://cloudinit.readthedocs.io/en/latest/topics/modules.html)
13+
- `group`: Optional str. Name of inventory group to which this config applies - if no group is specified then it applies to all groups. This allows defining `cloud_init_userdata_templates` for group `all`.
14+
- `template`: Jinja template for cloud_init module [configuration](https://cloudinit.readthedocs.io/en/latest/topics/modules.html).
15+
16+
Elements may repeat `module`; the resulting userdata cloud-config file will will contain configuration from all applicable (by group) elements for that module.
17+
18+
Note that the appliance [constructs](../../../environments/common/inventory/group_vars/all/cloud_init.yml) `cloud_init_userdata_templates` from `cloud_init_userdata_templates_default` and `cloud_init_userdata_templates_extra` to
19+
allow easier customisation in specific environments.
20+
21+
# Dependencies
22+
None.
23+
24+
# Example Playbook
25+
See `ansible/adhoc/rebuild.yml`.
26+
27+
# License
28+
Apache 2.0
29+
30+
# Author Information
31+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#cloud_init_output_path:
2+
cloud_init_userdata_templates: []
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
- name: Template out cloud-init userdata
3+
ansible.builtin.template:
4+
src: userdata.yml.j2
5+
dest: "{{ cloud_init_output_path }}/{{ inventory_hostname }}.userdata.yml"

0 commit comments

Comments
 (0)