Skip to content

Commit 83d232b

Browse files
committed
Merge branch 'main' into feat/eess-stratum-1
2 parents 19f37b2 + 8583a8f commit 83d232b

File tree

12 files changed

+59
-42
lines changed

12 files changed

+59
-42
lines changed

.github/workflows/fatimage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
source_image_name: Rocky-8-GenericCloud-Base-8.10-20240528.0.x86_64.raw
3232
inventory_groups: control,compute,login,update
3333
- image_name: openhpc-RL9
34-
source_image_name: Rocky-9-GenericCloud-Base-9.5-20241118.0.x86_64.raw
34+
source_image_name: Rocky-9-GenericCloud-Base-9.6-20250531.0.x86_64.qcow2
3535
inventory_groups: control,compute,login,update
3636
env:
3737
ANSIBLE_FORCE_COLOR: True

ansible/fatimage.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,11 @@
190190
tasks_from: install.yml
191191
when: "'alertmanager' in group_names"
192192

193+
- name: Download HPL source
194+
include_role:
195+
name: hpctests
196+
tasks_from: source-hpl.yml
197+
193198
- hosts: prometheus
194199
become: yes
195200
gather_facts: yes

ansible/roles/hpctests/tasks/build-hpl.yml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,21 @@
55
path: "{{ hpctests_rootdir }}/hpl"
66
state: directory
77

8-
- name: Download HPL sources
8+
- name: Unarchive HPL sources from /opt/hpl
99
unarchive:
10-
src: "http://www.netlib.org/benchmark/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
11-
remote_src: yes
10+
src: "/opt/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
1211
dest: "{{ hpctests_rootdir }}/hpl"
12+
remote_src: yes
13+
owner: "{{ hpctests_user }}"
14+
group: "{{ hpctests_group }}"
15+
mode: '0755'
1316
keep_newer: yes
1417

15-
- name: Copy BLAS make file
16-
command:
17-
cmd: "cp setup/Make.Linux_PII_CBLAS Make.{{ hpctests_hpl_arch }}"
18-
chdir: "{{ hpctests_hpl_srcdir }}"
19-
creates: "{{ hpctests_hpl_srcdir }}/Make.{{ hpctests_hpl_arch }}"
18+
- name: Copy BLAS makefile
19+
copy:
20+
src: "{{ hpctests_hpl_srcdir }}/setup/Make.Linux_PII_CBLAS"
21+
dest: "{{ hpctests_hpl_srcdir }}/Make.{{ hpctests_hpl_arch }}"
22+
remote_src: yes
2023

2124
- name: Modify make file
2225
replace:
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
3+
- name: Make directory
4+
file:
5+
path: "/opt/hpl"
6+
state: directory
7+
owner: root
8+
group: root
9+
mode: '0755'
10+
11+
- name: Download HPL tarball
12+
get_url:
13+
url: "http://www.netlib.org/benchmark/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
14+
dest: "/opt/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
15+
owner: root
16+
group: root
17+
mode: '0644'

ansible/roles/lustre/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@ The following variables control configuration of Lustre clients.
1919
- `lustre_mount_options`. Optional default mount options. Default values are systemd defaults from [Lustre client docs](http://wiki.lustre.org/Mounting_a_Lustre_File_System_on_Client_Nodes).
2020

2121
The following variables control the package build and and install:
22-
- `lustre_version`: Optional str. Version of lustre to build, default `2.15.6/lu-18085`
23-
which is the first version with EL9.5 support, plus a fix for https://jira.whamcloud.com/browse/LU-18085.
24-
- `lustre_repo`: Optional str. URL for Lustre repo. Default is a StackHPC repo
25-
incorporating the above fix.
22+
- `lustre_version`: Optional str. Version of lustre to build, default `2.15.7`
23+
- `lustre_repo`: Optional str. URL for Lustre repo. Default is `git://git.whamcloud.com/fs/lustre-release`.git.
2624
- `lustre_build_packages`: Optional list. Prerequisite packages required to build Lustre. See `defaults/main.yml`.
2725
- `lustre_build_dir`: Optional str. Path to build lustre at, default `/tmp/lustre-release`.
2826
- `lustre_configure_opts`: Optional list. Options to `./configure` command. Default builds client rpms supporting Mellanox OFED, without support for GSS keys.

ansible/roles/lustre/defaults/main.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
lustre_repo: https://github.com/stackhpc/lustre-release.git
2-
lustre_version: '2.15.6/lu-18085' # Fixes https://jira.whamcloud.com/browse/LU-18085
1+
lustre_version: '2.15.7'
32
lustre_lnet_label: tcp
43
#lustre_mgs_nid:
54
lustre_mounts: []
65
lustre_mount_state: mounted
76
lustre_mount_options: 'defaults,_netdev,noauto,x-systemd.automount,x-systemd.requires=lnet.service,nosuid,nodev'
87

98
# below variables are for build and should not generally require changes
10-
lustre_git_repo: "git://git.whamcloud.com/fs/lustre-release.git"
9+
lustre_repo: "git://git.whamcloud.com/fs/lustre-release.git"
1110
lustre_build_packages:
1211
- "kernel-devel-{{ ansible_kernel }}"
1312
- git

ansible/validate.yml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,6 @@
6464
- import_role:
6565
name: stackhpc.openhpc
6666
tasks_from: validate.yml
67-
- assert:
68-
that: "'enable_configless' in openhpc_config.SlurmctldParameters | default([])"
69-
fail_msg: |
70-
'enable_configless' not found in openhpc_config.SlurmctldParameters - is variable openhpc_config overridden?
71-
Additional slurm.conf parameters should be provided using variable openhpc_config_extra.
72-
success_msg: Checked Slurm will be configured for configless operation
73-
delegate_to: localhost
74-
run_once: true
7567

7668
- name: Validate filebeat configuration
7769
hosts: filebeat

docs/experimental/isolated-clusters.md

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,7 @@ created by cookiecutter for a new environment.
1212
The full list of features and whether they are functional on such an "isolated"
1313
network is shown in the table below. Note that:
1414

15-
1. The `hpl` test from the `ansible/adhoc/hpctests.yml` playbook is not
16-
functional and must be skipped using:
17-
18-
```shell
19-
ansible-playbook ansible/adhoc/hpctests.yml --skip-tags hpl-solo
20-
```
21-
22-
2. Using [EESSI](https://www.eessi.io/docs/) necessarily requires outbound
15+
- Using [EESSI](https://www.eessi.io/docs/) necessarily requires outbound
2316
network access for the CernVM File System. If security groups are not
2417
sufficent to restrict this:
2518
a. If outbound http is available, an authenticated proxy could be used,
@@ -54,7 +47,7 @@ See above for definition of "Default" features. In the "Isolated?" column:
5447
| freeipa_client | - | Y - image build required |
5548
| gateway | n/a | n/a - build only |
5649
| grafana | Y | Y |
57-
| hpctests | Y | Y - except hpl-solo, see above |
50+
| hpctests | Y | Y |
5851
| k3s_agent | - | ? |
5952
| k3s_server | - | ? |
6053
| k9s | - | ? |
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"cluster_image": {
3-
"RL8": "openhpc-RL8-250708-1502-1494192e",
4-
"RL9": "openhpc-RL9-250708-1547-1494192e"
3+
"RL8": "openhpc-RL8-250805-1410-35724c15",
4+
"RL9": "openhpc-RL9-250805-1409-35724c15"
55
}
66
}

environments/common/inventory/group_vars/all/openhpc.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ openhpc_packages_default:
4343
- podman-compose
4444
openhpc_packages_extra: []
4545
openhpc_packages: "{{ (openhpc_packages_default + openhpc_packages_extra) | select | list }}"
46-
openhpc_munge_key: "{{ vault_openhpc_mungekey | b64decode }}"
46+
openhpc_munge_key_b64: "{{ vault_openhpc_mungekey }}"
4747
openhpc_login_only_nodes: login
4848
openhpc_state_save_location: "{{ appliances_state_dir + '/slurmctld' if appliances_state_dir is defined else '/var/spool' }}"
4949

@@ -53,8 +53,6 @@ openhpc_config_extra: {}
5353

5454
# default additional slurm.conf parameters for the appliance:
5555
openhpc_config_default:
56-
SlurmctldParameters:
57-
- enable_configless
5856
TaskPlugin: task/cgroup,task/affinity
5957
ReturnToService: 2 # workaround for templating bug TODO: Remove once on stackhpc.openhpc v1.2.0
6058
TopologyPlugin: "topology/{{ 'tree' if (topology_nodes | length) > 0 else 'flat' }}"

0 commit comments

Comments
 (0)