Skip to content

Commit 962d512

Browse files
authored
Merge branch 'main' into ci/cleanup-previous
2 parents 3147447 + 1f3b7ad commit 962d512

File tree

13 files changed

+111
-99
lines changed

13 files changed

+111
-99
lines changed

ansible/roles/cacerts/tasks/export.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
ansible.builtin.copy:
44
src: "{{ item }}"
55
dest: /exports/cluster/cacerts/
6-
owner: slurm
7-
group: root
8-
mode: "0644"
6+
owner: ansible-init
7+
group: ansible-init
8+
mode: u=rw,go=
99
with_fileglob:
1010
- "{{ cacerts_cert_dir }}/*"
1111
delegate_to: "{{ groups['control'] | first }}"

ansible/roles/compute_init/files/compute-init.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
- ansible.builtin.meta: end_play
8484
- name: Check if hostvars exist
8585
become: true
86-
become_user: slurm
86+
become_user: ansible-init # share is root-squashed
8787
ansible.builtin.stat:
8888
path: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml"
8989
register: hostvars_stat
@@ -98,7 +98,7 @@
9898
- ansible.builtin.meta: end_play
9999
- name: Sync /mnt/cluster to /var/tmp
100100
become: true
101-
become_user: slurm
101+
become_user: ansible-init # share is root-squashed
102102
ansible.posix.synchronize:
103103
src: "/mnt/cluster/"
104104
dest: "/var/tmp/cluster/"

ansible/roles/compute_init/tasks/export.yml

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,47 @@
11
---
2-
- name: Ensure the /exports/cluster directory exists
2+
- name: Ensure /exports/cluster directory structure exists
33
ansible.builtin.file:
4-
path: /exports/cluster
4+
path: "{{ item }}"
55
state: directory
6-
owner: slurm
7-
group: root
6+
owner: ansible-init
7+
group: ansible-init
88
mode: u=rX,g=rwX,o=
99
run_once: true
10+
loop:
11+
- /exports/cluster
12+
- /exports/cluster/hostvars
13+
- /exports/cluster/cacerts
14+
- /exports/cluster/cvmfs
15+
- /exports/cluster/hostconfig
1016
delegate_to: "{{ groups['control'] | first }}"
1117

1218
- name: Copy /etc/hosts to /exports/cluster
1319
ansible.builtin.copy:
1420
src: /etc/hosts
1521
dest: /exports/cluster/hosts
16-
owner: slurm
17-
group: root
18-
mode: u=r,g=rw,o=
22+
owner: ansible-init
23+
group: ansible-init
24+
mode: u=rw,go=r
1925
remote_src: true
2026
run_once: true
2127
delegate_to: "{{ groups['control'] | first }}"
2228

23-
- name: Create hostvars directory
29+
- name: Create per-host hostvars directory
2430
ansible.builtin.file:
2531
path: /exports/cluster/hostvars/{{ inventory_hostname }}/
2632
state: directory
27-
owner: slurm
28-
group: root
29-
mode: u=rX,g=rwX,o=
33+
owner: ansible-init
34+
group: ansible-init
35+
mode: u=rwX,go=
3036
delegate_to: "{{ groups['control'] | first }}"
3137

3238
- name: Template out hostvars
3339
ansible.builtin.template:
3440
src: hostvars.yml.j2
3541
dest: /exports/cluster/hostvars/{{ inventory_hostname }}/hostvars.yml
36-
owner: slurm
37-
group: root
38-
mode: u=r,g=rw,o=
42+
owner: ansible-init
43+
group: ansible-init
44+
mode: u=rw,go=
3945
delegate_to: "{{ groups['control'] | first }}"
4046

4147
- name: Copy manila share info to /exports/cluster
@@ -52,29 +58,19 @@
5258
os_manila_mount_share_info_var:
5359
os_manila_mount_share_info: "{{ os_manila_mount_share_info }}"
5460

55-
- name: Ensure /exports/cluster/cvmfs directory exists
56-
ansible.builtin.file:
57-
path: /exports/cluster/cvmfs
58-
state: directory
59-
owner: slurm
60-
group: root
61-
mode: "0755"
62-
run_once: true
63-
delegate_to: "{{ groups['control'] | first }}"
64-
6561
- name: Export cacerts
6662
ansible.builtin.include_role:
6763
name: cacerts
6864
tasks_from: export.yml
6965
when: "'cacerts' in group_names"
7066

71-
- name: Create hostconfig directory
67+
- name: Create per-host hostconfig directory
7268
ansible.builtin.file:
7369
path: "/exports/cluster/hostconfig/{{ inventory_hostname }}/"
7470
state: directory
75-
owner: slurm
76-
group: root
77-
mode: u=rX,g=rwX,o=
71+
owner: ansible-init
72+
group: ansible-init
73+
mode: u=rwX,go=
7874
delegate_to: "{{ groups['control'] | first }}"
7975

8076
- name: Template sssd config

ansible/roles/cuda/defaults/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{
44
cuda_nvidia_driver_stream: '580-open'
55
cuda_nvidia_driver_version: '580.105.08-1'
66
cuda_nvidia_driver_pkg: "nvidia-open-3:{{ cuda_nvidia_driver_version }}.el{{ ansible_distribution_major_version }}"
7-
cuda_package_version: '13.0.2-1'
7+
cuda_package_version: '13.1.0-1'
88
cuda_version_short: "{{ (cuda_package_version | split('.'))[0:2] | join('.') }}" # major.minor
99
cuda_packages_default:
1010
- "cuda-toolkit-{{ cuda_package_version }}"

ansible/roles/nhc/tasks/export.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33
ansible.builtin.template:
44
src: "{{ nhc_config_template }}"
55
dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/nhc.conf"
6-
mode: "0644"
6+
owner: ansible-init
7+
group: ansible-init
8+
mode: u=rw,go=
79
delegate_to: "{{ groups['control'] | first }}"

ansible/roles/sssd/tasks/export.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
ansible.builtin.template:
55
src: "{{ sssd_conf_src }}"
66
dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/sssd.conf"
7-
owner: root
8-
group: root
7+
owner: ansible-init
8+
group: ansible-init
99
mode: u=rw,go=
1010
delegate_to: "{{ groups['control'] | first }}"

dev/ansible-ssh

Lines changed: 43 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,64 @@
11
#!/usr/bin/env python3
2+
"""
3+
SSH to a cluster host using connection properties from Ansible inventory.
24
3-
# This tool allows you to ssh into a host using the ansible inventory.
4-
# Example: ansible-ssh compute[0] -o GlobalKnownHostsFile=/dev/null -o
5-
# UserKnownHostsFile=/dev/null
5+
Usage:
6+
ansible-ssh [-n] PATTERN
7+
8+
where PATTERN can be any of:
9+
- host e.g. mycluster-login-0
10+
- group e.g. login
11+
- group[index] e.g. compute[0]
12+
options:
13+
-n Disable strict host key checking and do not store accepted keys
14+
"""
615

716
import json
817
import os
918
import shlex
1019
import subprocess
1120
import sys
12-
from collections import defaultdict
13-
14-
15-
def _optional_arg(prototype, *values):
16-
# returns empty string if any of the values are falsey
17-
filtered = [value for value in values if value]
18-
return prototype.format(*values) if len(values) == len(filtered) else ""
1921

22+
NO_HOSTKEY_CHECK_OPTS = [
23+
'-o', 'StrictHostKeyChecking=no',
24+
'-o', 'GlobalKnownHostsFile=/dev/null',
25+
'-o', 'UserKnownHostsFile=/dev/null'
26+
]
2027

2128
if __name__ == "__main__":
22-
if len(sys.argv) < 2:
23-
msg = (
24-
f"Usage: {sys.argv[0]} <inventory_hostname> [args to pass to ssh]")
25-
print(msg, file=sys.stderr)
29+
no_known_hosts = '-n' in sys.argv
30+
if sys.argv[-1] in (sys.argv[0], '-n'):
31+
print(__doc__, file=sys.stderr)
2632
sys.exit(-1)
2733

28-
# Quote to prevent shell injection
29-
host = shlex.quote(sys.argv[1])
34+
pattern = sys.argv[-1]
3035

36+
template_str = ("ssh "
37+
"{% if ansible_ssh_port | default(false) %}-p {{ ansible_ssh_port }} {% endif %}"
38+
"{% if ansible_ssh_private_key_file | default(false) %} -i {{ ansible_ssh_private_key_file }} {% endif %}"
39+
"{{ ansible_ssh_common_args | default('') }} "
40+
"{{ ansible_user }}@{{ ansible_host }}")
41+
module_args = json.dumps({'msg':template_str})
42+
ansible_cmd = ['ansible', pattern, '-o', '-m', 'debug', '-a', module_args]
3143
try:
32-
output = subprocess.check_output(
33-
f'ansible-inventory --host {host}', shell=True)
34-
except (subprocess.CalledProcessError) as e:
35-
msg = (f"[ERROR]: Is {host} missing from the inventory?")
44+
output = subprocess.check_output(ansible_cmd, text=True)
45+
except (subprocess.CalledProcessError):
46+
msg = ("[ERROR]: ansible exited in error")
3647
print(msg, file=sys.stderr)
3748
sys.exit(-1)
49+
# output looks like e.g.
50+
# stg-login-0 | SUCCESS => { "changed": false, ...
51+
# one line per host
52+
if not output:
53+
sys.exit(1)
54+
result = output.splitlines()[0].split('>', 1)[-1]
55+
expanded = json.loads(result)['msg']
56+
# can assume ansible_host exists b/c defined by terraform
57+
# can assume ansible_user exists b/c defined in common inventory
3858

39-
meta = defaultdict(str, json.loads(output))
40-
41-
ansible_ssh_host = meta['ansible_ssh_host'] or meta['ansible_host']
42-
ansible_ssh_user = meta['ansible_ssh_user'] or meta['ansible_user']
43-
ansible_ssh_port = meta['ansible_ssh_port']
44-
ansible_ssh_private_key_file = meta['ansible_ssh_private_key_file']
45-
46-
port = _optional_arg("-p {}", ansible_ssh_port)
47-
identity = _optional_arg("-i {}", ansible_ssh_private_key_file)
48-
host = _optional_arg("{}@{}", ansible_ssh_user, ansible_ssh_host)
49-
opts = meta['ansible_ssh_common_args']
50-
51-
# Handle case where user is not set
52-
if not host:
53-
host = ansible_ssh_host
54-
55-
if not host:
56-
# if we get here, "ansible_ssh_host" is not set.
57-
msg = f"Could not determine the host"
58-
print(msg, file=sys.stderr)
59-
sys.exit(-1)
59+
cmd = shlex.split(expanded)
60+
if no_known_hosts:
61+
cmd = cmd[0:1] + NO_HOSTKEY_CHECK_OPTS + cmd[1:]
6062

61-
base = shlex.split(f'ssh {port} {identity} {opts}')
62-
extras = sys.argv[2:]
63-
cmd = base + extras + [host]
6463
print(f"[INFO]: Running: {subprocess.list2cmdline(cmd)}")
6564
os.execvp(cmd[0], cmd)

dev/image-set-properties.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ openstack image set \
1717
--property hw_architecture=x86_64 \
1818
--property hw_vif_multiqueue_enabled=true \
1919
--property hw_firmware_type=uefi \
20-
--property os_distro=rocky \
2120
--property os_type=linux \
2221
--property os_admin_user=rocky \
2322
"$image"

environments/.caas/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ Non-standard things for this environment:
1212
azimuth_caas_stackhpc_slurm_appliance_template:
1313
...
1414
envVars:
15-
ANSIBLE_INVENTORY: environments/common/inventory,environments/.caas/inventory
15+
ANSIBLE_INVENTORY: environments/common/inventory,environments/site/inventory,environments/.caas/inventory
16+
17+
([Source](https://github.com/azimuth-cloud/ansible-collection-azimuth-ops/blob/main/roles/azimuth_caas_operator/defaults/main.yml#L199))
1618

1719
Ansible then defines `ansible_inventory_sources` which contains absolute paths, and
1820
that is used to derive the `appliances_environment_root` and
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"cluster_image": {
3-
"RL8": "openhpc-RL8-251211-0951-51b93e3f",
4-
"RL9": "openhpc-RL9-251211-0951-51b93e3f"
3+
"RL8": "openhpc-RL8-251213-1133-31273766",
4+
"RL9": "openhpc-RL9-251213-1133-31273766"
55
}
66
}

0 commit comments

Comments
 (0)