Skip to content

Commit c3a03fd

Browse files
authored
Merge branch 'main' into eessi
2 parents ad2ff81 + 0713c64 commit c3a03fd

File tree

22 files changed

+358
-199
lines changed

22 files changed

+358
-199
lines changed

.github/workflows/fatimage.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ jobs:
3636
build:
3737
- image_name: openhpc-RL8
3838
source_image_name: Rocky-8-GenericCloud-Base-8.10-20240528.0.x86_64.raw
39-
inventory_groups: control,compute,login,update
39+
inventory_groups: fatimage
4040
- image_name: openhpc-RL9
4141
source_image_name: Rocky-9-GenericCloud-Base-9.6-20250531.0.x86_64.qcow2
42-
inventory_groups: control,compute,login,update
42+
inventory_groups: fatimage
4343
env:
4444
ANSIBLE_FORCE_COLOR: True
4545
OS_CLOUD: openstack

.github/workflows/nightly-cleanup.yml

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -46,53 +46,20 @@ jobs:
4646
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
4747
shell: bash
4848

49-
- name: Find CI clusters
49+
- name: Delete all CI clusters
5050
run: |
5151
. venv/bin/activate
52-
CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq || true)
53-
echo "DEBUG: Raw CI clusters: $CI_CLUSTERS"
54-
55-
if [[ -z "$CI_CLUSTERS" ]]; then
56-
echo "No matching CI clusters found."
57-
else
58-
# Flatten multiline value so can be passed as env var
59-
CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//')
60-
echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED"
61-
echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> "$GITHUB_ENV"
62-
fi
52+
./dev/delete-cluster.py slurmci-RL --force
6353
shell: bash
64-
65-
- name: Delete CI clusters
54+
55+
- name: Delete all CI extra build VMs and volumes
6656
run: |
6757
. venv/bin/activate
68-
if [[ -z ${ci_clusters} ]]; then
69-
echo "No clusters to delete."
70-
exit 0
71-
fi
72-
73-
for cluster_prefix in ${ci_clusters}
74-
do
75-
echo "Processing cluster: $cluster_prefix"
76-
77-
# Get all servers with the matching name for control node
78-
CONTROL_SERVERS=$(openstack server list --name "${cluster_prefix}-control" --format json)
79-
80-
# Get unique server names to avoid duplicate cleanup
81-
UNIQUE_NAMES=$(echo "$CONTROL_SERVERS" | jq -r '.[].Name' | sort | uniq)
82-
for name in $UNIQUE_NAMES; do
83-
echo "Deleting cluster with control node: $name"
84-
85-
# Get the first matching server ID by name
86-
server=$(echo "$CONTROL_SERVERS" | jq -r '.[] | select(.Name=="'"$name"'") | .ID' | head -n1)
87-
88-
# Make sure server still exists (wasn't deleted earlier)
89-
if ! openstack server show "$server" &>/dev/null; then
90-
echo "Server $server no longer exists, skipping $name."
91-
continue
92-
fi
58+
./dev/delete-cluster.py openhpc-extra-RL --force
59+
shell: bash
9360

94-
echo "Deleting cluster $cluster_prefix (server $server)..."
95-
./dev/delete-cluster.py "$cluster_prefix" --force
96-
done
97-
done
61+
- name: Delete all fatimage build VMs and volumes
62+
run: |
63+
. venv/bin/activate
64+
./dev/delete-cluster.py openhpc-RL --force
9865
shell: bash

ansible/adhoc/sync-pulp.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,5 @@
55
name: pulp_site
66
tasks_from: sync.yml
77
vars:
8-
pulp_site_target_arch: "x86_64"
9-
pulp_site_target_distribution: "rocky"
108
# default distribution to *latest* specified for baseos repo:
119
pulp_site_target_distribution_version: "{{ dnf_repos_repos['baseos'].keys() | map('float') | sort | last }}"

ansible/fatimage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
- name: Install OpenHPC
118118
ansible.builtin.import_role:
119119
name: stackhpc.openhpc
120-
tasks_from: install.yml
120+
tasks_from: install-ohpc.yml
121121
when: "'openhpc' in group_names"
122122

123123
# - import_playbook: portal.yml

ansible/roles/nhc/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ When the `ansible/site.yml` playbook is run this will automatically:
3737

3838
- Filesystem mounts
3939
- Ethernet interfaces
40+
- InfiniBand interfaces
4041

4142
See `/etc/nhc/nhc.conf` on a compute node for the full configuration.
4243

ansible/roles/nhc/templates/nhc.conf.j2

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,10 @@
1616
{{ ansible_fqdn }} || check_hw_eth {{ iface }}
1717
{% endfor %}
1818

19+
## InfiniBand interface checks
20+
{% for iface in ansible_interfaces | select('match', 'ib') %}
21+
{{ ansible_fqdn }} || check_hw_ib {{ (ansible_facts[iface]['speed'] / 1000) | int }} {{ iface }}
22+
{% endfor %}
23+
1924
## Site-specific checks
2025
{{ nhc_config_extra }}

ansible/roles/sshd/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ Configure sshd.
66

77
- `sshd_password_authentication`: Optional bool. Whether to enable password login. Default `false`.
88
- `sshd_disable_forwarding`: Optional bool. Whether to disable all forwarding features (X11, ssh-agent, TCP and StreamLocal). Default `true`.
9+
- `sshd_allow_local_forwarding`: Optional bool. Whether to allow limited forwarding for the Visual Studio Code Remote - SSH extension. Use together with `sshd_disable_forwarding: false`. NOTE THIS MAY BE INSECURE! Default `false`.
910
- `sshd_conf_src`: Optional string. Path to sshd configuration template. Default is in-role template.
1011
- `sshd_conf_dest`: Optional string. Path to destination for sshd configuration file. Default is `/etc/ssh/sshd_config.d/10-ansible.conf` which overrides `50-{cloud-init,redhat}` files, if present.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
sshd_password_authentication: false
33
sshd_disable_forwarding: true
4+
sshd_allow_local_forwarding: false
45
sshd_conf_src: sshd.conf.j2
56
sshd_conf_dest: /etc/ssh/sshd_config.d/10-ansible.conf
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
# {{ ansible_managed }}
22
PasswordAuthentication {{ 'yes' if sshd_password_authentication | bool else 'no' }}
33
DisableForwarding {{ 'yes' if sshd_disable_forwarding | bool else 'no' }}
4+
{% if sshd_allow_local_forwarding %}
5+
AllowTcpForwarding local
6+
PermitOpen 127.0.0.1:*
7+
{% endif %}

ansible/roles/zenith_proxy/tasks/main.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
group: "{{ zenith_proxy_podman_user }}"
6262
mode: "0755"
6363
become: true
64+
delegate_to: "{{ groups['control'] | first }}"
65+
run_once: true
6466

6567
- name: Initialise Zenith client
6668
# Use a foreground command rather than the podman_container module as I could not

0 commit comments

Comments
 (0)