From 341094766cd3908cdd6620ba591749e8772ec515 Mon Sep 17 00:00:00 2001 From: Steve Brasier <33413598+sjpb@users.noreply.github.com> Date: Thu, 8 May 2025 10:30:15 +0100 Subject: [PATCH] Update openhpc.yml Fix some issues with partition config: - "Default" partition missing partition_params and default. Although actually, I suspect this is no matching inventory group and hence no nodes in it - so probably the entire partition is irrelevant and shouldl be removed? - default values should be strings, bit worried bare NO/YES do some horrible ansible autoconversion to bool - made partition params case consistent, although actually SLurm won't care. --- .../vtest/inventory/group_vars/all/openhpc.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/environments/vtest/inventory/group_vars/all/openhpc.yml b/environments/vtest/inventory/group_vars/all/openhpc.yml index eb59677ab..b1d5aeef5 100644 --- a/environments/vtest/inventory/group_vars/all/openhpc.yml +++ b/environments/vtest/inventory/group_vars/all/openhpc.yml @@ -18,9 +18,11 @@ openhpc_packages_extra: "{{ openhpc_packages_extra_nrel }}" openhpc_slurm_partitions: - name: "DEFAULT" - PreemptMode: "OFF" + default: "NO" + partition_params: + PreemptMode: "OFF" - name: "gpu" - default: NO + default: "NO" maxtime: "2-0" # 2 days 0 hours groups: - name: "gpu0" @@ -34,13 +36,13 @@ openhpc_slurm_partitions: Features: "gpu,a100,a100-40" gres: - conf: "gpu:a100:4" - file: "/dev/nvidia[0-3]" + file: "/dev/nvidia[0-3]" - name: "gpu3" node_params: Features: "gpu,a100,a100-80" gres: - conf: "gpu:a100:8" - file: "/dev/nvidia[0-7]" + file: "/dev/nvidia[0-7]" partition_params: DefMemPerCPU: "1024" QOS: "p_gpu" @@ -49,10 +51,10 @@ openhpc_slurm_partitions: #- conf: "gpu:a100:1" #file: "/dev/nvidia0" - name: "lg" - default: YES + default: "YES" maxtime: "2-0" # 2 days 0 hours partition_params: - qos: "p_lg" + QOS: "p_lg" OverSubscribe: "no" DefMemPerCPU: "1024"