From 489a2403ba5f2bf35e78272bc1950d089848c11b Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 28 Aug 2025 17:44:44 +0100 Subject: [PATCH 01/11] bump Slurm to v2.3 and adjust caas inventory to work --- roles/azimuth_caas_operator/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 3fd02d059..933bdfee3 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -97,7 +97,7 @@ azimuth_caas_stackhpc_slurm_appliance_enabled: "{{ azimuth_clusters_enabled }}" # The git URL for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_git_url: https://github.com/stackhpc/ansible-slurm-appliance.git # The git version for the StackHPC Slurm appliance -azimuth_caas_stackhpc_slurm_appliance_git_version: v1.160 +azimuth_caas_stackhpc_slurm_appliance_git_version: v2.3 # The playbook to use for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_playbook: ansible/site.yml # The timeout to apply to the k8s jobs which create, update & delete platform instances @@ -196,7 +196,7 @@ azimuth_caas_stackhpc_slurm_appliance_template: jobTimeout: "{{ azimuth_caas_stackhpc_slurm_appliance_job_timeout_seconds }}" envVars: # Normally set through environment's activate script: - ANSIBLE_INVENTORY: environments/common/inventory,environments/.caas/inventory # NB: Relative to runner project dir + ANSIBLE_INVENTORY: environments/common/inventory,environments/site/inventory,environments/.caas/inventory # NB: Relative to runner project dir # Indicates if the workstation should be enabled azimuth_caas_workstation_enabled: >- From 1bd2a37a43c4820d2b8e105d730339e58611301b Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 29 Aug 2025 13:35:49 +0100 Subject: [PATCH 02/11] temp slurm sha test --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 933bdfee3..15aeff93e 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -97,7 +97,7 @@ azimuth_caas_stackhpc_slurm_appliance_enabled: "{{ azimuth_clusters_enabled }}" # The git URL for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_git_url: https://github.com/stackhpc/ansible-slurm-appliance.git # The git version for the StackHPC Slurm appliance -azimuth_caas_stackhpc_slurm_appliance_git_version: v2.3 +azimuth_caas_stackhpc_slurm_appliance_git_version: 2438e92c1576b870784c31355ac43a5279223ae3 # The playbook to use for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_playbook: ansible/site.yml # The timeout to apply to the k8s jobs which create, update & delete platform instances From e668e3ad2ad2e5ce9461fd909320d32a4249bbdc Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:10:58 +0100 Subject: [PATCH 03/11] bump slurm appliance v2.4 --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 15aeff93e..514da57cf 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -97,7 +97,7 @@ azimuth_caas_stackhpc_slurm_appliance_enabled: "{{ azimuth_clusters_enabled }}" # The git URL for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_git_url: https://github.com/stackhpc/ansible-slurm-appliance.git # The git version for the StackHPC Slurm appliance -azimuth_caas_stackhpc_slurm_appliance_git_version: 2438e92c1576b870784c31355ac43a5279223ae3 +azimuth_caas_stackhpc_slurm_appliance_git_version: v2.4 # The playbook to use for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_playbook: ansible/site.yml # The timeout to apply to the k8s jobs which create, update & delete platform instances From 421a81e986bfeff91ff8b11984f965bb425858ec Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:08:59 +0100 Subject: [PATCH 04/11] Update main.yml --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 514da57cf..2b1d346ba 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -2,7 +2,7 @@ # The chart to use azimuth_caas_operator_chart_repo: https://azimuth-cloud.github.io/azimuth-caas-operator azimuth_caas_operator_chart_name: azimuth-caas-operator -azimuth_caas_operator_chart_version: 0.17.6 +azimuth_caas_operator_chart_version: 0.18.0 # Release information for the operator release # Use the same namespace as Azimuth by default From 77ba39b611bc98bd3ee2a4a300e608be4a861800 Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Wed, 3 Sep 2025 12:14:57 +0100 Subject: [PATCH 05/11] TEMPORARY CHANGE: v2.4 slurm image --- roles/azimuth_caas_operator/defaults/main.yml | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 2b1d346ba..f1e450c5f 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -119,23 +119,7 @@ azimuth_caas_stackhpc_slurm_appliance_metadata_url: >- # The ID of the image to use with the StackHPC Slurm appliance # Support the old name for backwards compatibility # By default, use the openhpc image from community images if available -azimuth_caas_stackhpc_slurm_appliance_image: >- - {{- - azimuth_caas_stackhpc_slurm_appliance_rocky8_image - if azimuth_caas_stackhpc_slurm_appliance_rocky8_image is defined - else ( - community_images_image_ids.openhpc - if ( - community_images_image_ids is defined and - 'openhpc' in community_images_image_ids - ) - else ( - undef(hint = 'azimuth_caas_stackhpc_slurm_appliance_image is required') - if azimuth_caas_stackhpc_slurm_appliance_enabled - else None - ) - ) - }} +azimuth_caas_stackhpc_slurm_appliance_image: openhpc-RL9-250808-1727-faa44755 # Combine default and any extra workstation ansible collections azimuth_caas_stackhpc_slurm_appliance_init_collections_default: >- {{ ansible_init_default_collections | default([]) }} From 32aaa8d9d234c3f8c5f604055da58445dd74be05 Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Wed, 3 Sep 2025 14:15:57 +0100 Subject: [PATCH 06/11] repa --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index f1e450c5f..f146fa506 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -119,7 +119,7 @@ azimuth_caas_stackhpc_slurm_appliance_metadata_url: >- # The ID of the image to use with the StackHPC Slurm appliance # Support the old name for backwards compatibility # By default, use the openhpc image from community images if available -azimuth_caas_stackhpc_slurm_appliance_image: openhpc-RL9-250808-1727-faa44755 +azimuth_caas_stackhpc_slurm_appliance_image: 7ca99016-c342-4557-8a8d-9a856e934b58 # Combine default and any extra workstation ansible collections azimuth_caas_stackhpc_slurm_appliance_init_collections_default: >- {{ ansible_init_default_collections | default([]) }} From 55f0f993ddc3ba0028bfb414c5a15e85f13a45ee Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Fri, 5 Sep 2025 14:10:38 +0100 Subject: [PATCH 07/11] temp point to slurm branch for testing --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index f146fa506..571dccdb1 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -97,7 +97,7 @@ azimuth_caas_stackhpc_slurm_appliance_enabled: "{{ azimuth_clusters_enabled }}" # The git URL for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_git_url: https://github.com/stackhpc/ansible-slurm-appliance.git # The git version for the StackHPC Slurm appliance -azimuth_caas_stackhpc_slurm_appliance_git_version: v2.4 +azimuth_caas_stackhpc_slurm_appliance_git_version: fix/caas-secrets # The playbook to use for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_playbook: ansible/site.yml # The timeout to apply to the k8s jobs which create, update & delete platform instances From a738d839384b29b5d4412b68c966744eb2479d64 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 8 Sep 2025 11:14:22 +0100 Subject: [PATCH 08/11] update slurm community image and bucket source --- roles/azimuth_caas_operator/defaults/main.yml | 18 +++++++++++++++++- roles/community_images/defaults/main.yml | 8 ++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 571dccdb1..5ad34f6f1 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -119,7 +119,23 @@ azimuth_caas_stackhpc_slurm_appliance_metadata_url: >- # The ID of the image to use with the StackHPC Slurm appliance # Support the old name for backwards compatibility # By default, use the openhpc image from community images if available -azimuth_caas_stackhpc_slurm_appliance_image: 7ca99016-c342-4557-8a8d-9a856e934b58 +azimuth_caas_stackhpc_slurm_appliance_image: >- + {{- + azimuth_caas_stackhpc_slurm_appliance_rocky8_image + if azimuth_caas_stackhpc_slurm_appliance_rocky8_image is defined + else ( + community_images_image_ids.openhpc + if ( + community_images_image_ids is defined and + 'openhpc' in community_images_image_ids + ) + else ( + undef(hint = 'azimuth_caas_stackhpc_slurm_appliance_image is required') + if azimuth_caas_stackhpc_slurm_appliance_enabled + else None + ) + ) + }} # Combine default and any extra workstation ansible collections azimuth_caas_stackhpc_slurm_appliance_init_collections_default: >- {{ ansible_init_default_collections | default([]) }} diff --git a/roles/community_images/defaults/main.yml b/roles/community_images/defaults/main.yml index 764af52f0..8af0d54df 100644 --- a/roles/community_images/defaults/main.yml +++ b/roles/community_images/defaults/main.yml @@ -84,12 +84,12 @@ community_images_azimuth_images: |- # Slurm images are published by the ansible-slurm-appliance repo - https://github.com/stackhpc/ansible-slurm-appliance/ community_images_slurm_base_url: >- - https://object.arcus.openstack.hpc.cam.ac.uk/swift/v1/AUTH_3a06571936a0424bb40bc5c672c4ccb1/openhpc-images + https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/openhpc-images community_images_slurm: - # from https://github.com/stackhpc/ansible-slurm-appliance/releases/tag/v1.159 + # from https://github.com/stackhpc/ansible-slurm-appliance/releases/tag/v2.4 openhpc: - name: openhpc-RL9-250506-1259-abb6394b - source_url: "{{ community_images_slurm_base_url }}/openhpc-RL9-250506-1259-abb6394b" + name: openhpc-RL9-250808-1727-faa44755 + source_url: "{{ community_images_slurm_base_url }}/openhpc-RL9-250808-1727-faa44755" source_disk_format: qcow2 container_format: bare From 0fd9bd5be176e3d7247f08d6dcf1f2dcadc1cea3 Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Wed, 10 Sep 2025 00:18:36 +0100 Subject: [PATCH 09/11] bump v2.4.2 slurm --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 5ad34f6f1..89fc0c451 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -97,7 +97,7 @@ azimuth_caas_stackhpc_slurm_appliance_enabled: "{{ azimuth_clusters_enabled }}" # The git URL for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_git_url: https://github.com/stackhpc/ansible-slurm-appliance.git # The git version for the StackHPC Slurm appliance -azimuth_caas_stackhpc_slurm_appliance_git_version: fix/caas-secrets +azimuth_caas_stackhpc_slurm_appliance_git_version: v2.4.2 # The playbook to use for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_playbook: ansible/site.yml # The timeout to apply to the k8s jobs which create, update & delete platform instances From 5b0397526bb290a36b1997b6565f10114935d386 Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Wed, 10 Sep 2025 09:21:56 +0100 Subject: [PATCH 10/11] Update main.yml --- roles/azimuth_caas_operator/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/azimuth_caas_operator/defaults/main.yml b/roles/azimuth_caas_operator/defaults/main.yml index 89fc0c451..3dd078856 100644 --- a/roles/azimuth_caas_operator/defaults/main.yml +++ b/roles/azimuth_caas_operator/defaults/main.yml @@ -97,7 +97,7 @@ azimuth_caas_stackhpc_slurm_appliance_enabled: "{{ azimuth_clusters_enabled }}" # The git URL for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_git_url: https://github.com/stackhpc/ansible-slurm-appliance.git # The git version for the StackHPC Slurm appliance -azimuth_caas_stackhpc_slurm_appliance_git_version: v2.4.2 +azimuth_caas_stackhpc_slurm_appliance_git_version: v2.5 # The playbook to use for the StackHPC Slurm appliance azimuth_caas_stackhpc_slurm_appliance_playbook: ansible/site.yml # The timeout to apply to the k8s jobs which create, update & delete platform instances From 0a368e9d86492488e2272cf3000d031000ddb497 Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Wed, 10 Sep 2025 10:52:15 +0100 Subject: [PATCH 11/11] Update main.yml --- roles/community_images/defaults/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/community_images/defaults/main.yml b/roles/community_images/defaults/main.yml index 8af0d54df..851858b83 100644 --- a/roles/community_images/defaults/main.yml +++ b/roles/community_images/defaults/main.yml @@ -86,10 +86,10 @@ community_images_azimuth_images: |- community_images_slurm_base_url: >- https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/openhpc-images community_images_slurm: - # from https://github.com/stackhpc/ansible-slurm-appliance/releases/tag/v2.4 + # from https://github.com/stackhpc/ansible-slurm-appliance/releases/tag/v2.5 openhpc: - name: openhpc-RL9-250808-1727-faa44755 - source_url: "{{ community_images_slurm_base_url }}/openhpc-RL9-250808-1727-faa44755" + name: openhpc-RL9-250908-2047-d90ebd0e + source_url: "{{ community_images_slurm_base_url }}/openhpc-RL9-250908-2047-d90ebd0e" source_disk_format: qcow2 container_format: bare