From 9f53fd82a8794a2903a683688ab5a570f0572236 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 19:13:02 +0000 Subject: [PATCH 01/13] wip - bump openhpc role for testing --- requirements.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.yml b/requirements.yml index 13c6b77fe..f8d9d27a9 100644 --- a/requirements.yml +++ b/requirements.yml @@ -4,7 +4,7 @@ roles: version: v25.3.2 name: stackhpc.nfs - src: https://github.com/stackhpc/ansible-role-openhpc.git - version: v1.4.0 + version: feat/auto-gres # TODO: bump to release name: stackhpc.openhpc - src: https://github.com/stackhpc/ansible-node-exporter.git version: stackhpc From d23904d4c2ff11ecdde67bd3356acbc7d2e4ee5d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 10 Oct 2025 14:04:22 +0000 Subject: [PATCH 02/13] remove GresTypes from MIG docs --- docs/mig.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/mig.md b/docs/mig.md index b8eeae8ad..fcef06f1d 100644 --- a/docs/mig.md +++ b/docs/mig.md @@ -215,9 +215,6 @@ openhpc_nodegroups: - conf: "gpu:nvidia_h100_80gb_hbm3_4g.40gb:2" - conf: "gpu:nvidia_h100_80gb_hbm3_1g.10gb:6" -openhpc_config: - GresTypes: - - gpu ``` Making sure the types (the identifier after `gpu:`) match those collected with `slurmd -G`. Substrings From 67c93f0bba83b9cf389263682692ba24992cb5c3 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 10 Oct 2025 14:50:16 +0000 Subject: [PATCH 03/13] enable nvml autoconfiguration for CaaS --- environments/.caas/inventory/group_vars/all/openhpc.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environments/.caas/inventory/group_vars/all/openhpc.yml b/environments/.caas/inventory/group_vars/all/openhpc.yml index 56c8b907d..e3ad4c0e4 100644 --- a/environments/.caas/inventory/group_vars/all/openhpc.yml +++ b/environments/.caas/inventory/group_vars/all/openhpc.yml @@ -4,3 +4,6 @@ openhpc_cluster_name: "{{ cluster_name }}" # Provision a single "standard" compute nodegroup using the supplied # node count and flavor openhpc_nodegroups: "{{ hostvars[groups['openstack'][0]]['openhpc_nodegroups'] }}" + +# Enable autoconfiguration of NVIDIA GPUs, if using a suitable (`cuda`) image: +openhpc_gres_autodetect: nvml From 0ed4fab690b55f6f49e8d55821e706965649bd07 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 10 Oct 2025 16:07:28 +0000 Subject: [PATCH 04/13] fix linter problems --- docs/mig.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/mig.md b/docs/mig.md index fcef06f1d..092629fed 100644 --- a/docs/mig.md +++ b/docs/mig.md @@ -214,7 +214,6 @@ openhpc_nodegroups: - conf: "gpu:nvidia_h100_80gb_hbm3:2" - conf: "gpu:nvidia_h100_80gb_hbm3_4g.40gb:2" - conf: "gpu:nvidia_h100_80gb_hbm3_1g.10gb:6" - ``` Making sure the types (the identifier after `gpu:`) match those collected with `slurmd -G`. Substrings From 81b0759714b2d8ba69f778fda017e9cfe01ec39b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 23 Oct 2025 13:55:57 +0100 Subject: [PATCH 05/13] add support for GRES to ondemand desktop,matlab,rstudio apps --- ansible/roles/openondemand/README.md | 6 +- ansible/roles/openondemand/defaults/main.yml | 3 + .../openondemand/filter_plugins/filters.py | 42 ++++++++++ ansible/roles/openondemand/tasks/main.yml | 10 +++ .../inventory/group_vars/all/openondemand.yml | 82 ++++++++++++++++++- 5 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 ansible/roles/openondemand/filter_plugins/filters.py diff --git a/ansible/roles/openondemand/README.md b/ansible/roles/openondemand/README.md index b1fb6731c..5964c5b32 100644 --- a/ansible/roles/openondemand/README.md +++ b/ansible/roles/openondemand/README.md @@ -73,7 +73,11 @@ This role enables SSL on the Open Ondemand server, using the following self-sign - `openondemand_desktop_screensaver`: Optional. Whether to enable screen locking/screensaver. **NB:** Users must have passwords if this is enabled. Bool, default `false`. - `openondemand_filesapp_paths`: List of paths (in addition to $HOME, which is always added) to include shortcuts to within the Files dashboard app. - `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers. Requires a corresponding group named "openondemand_jupyter" and entry in openhpc_partitions. - +- `openondemand_gres_options`: Optional. A list of `[label, value]` items used + to provide a drop-down for resource/GRES selection in application forms. The + default constructs a list from all GRES definitions in the cluster. See the + `option` attribute of the Select Field [form widget](https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets). + ### Monitoring - `openondemand_exporter`: Optional. Install the Prometheus [ondemand_exporter](https://github.com/OSC/ondemand_exporter) on the `openondemand` node to export metrics about Open Ondemand itself. Default `true`. diff --git a/ansible/roles/openondemand/defaults/main.yml b/ansible/roles/openondemand/defaults/main.yml index 86fb49f20..72555881b 100644 --- a/ansible/roles/openondemand/defaults/main.yml +++ b/ansible/roles/openondemand/defaults/main.yml @@ -102,6 +102,9 @@ openondemand_osc_ood_defaults: ood_auth_openidc: "{{ openondemand_auth_defaults.oidc.ood_auth_openidc if (openondemand_auth | lower) == 'oidc' else none }}" httpd_auth: "{{ openondemand_auth_defaults[openondemand_auth | lower].httpd_auth }}" +# Apps: openondemand_code_server_version: 4.102.2 openondemand_rstudio_version: 2025.05.1-513 openondemand_matlab_version: '' +# Below +openondemand_gres_options: "{{ _openondemand_sinfo_gres.stdout | to_gres_options }}" \ No newline at end of file diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py new file mode 100644 index 000000000..1bf6d8d90 --- /dev/null +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# pylint: disable=missing-module-docstring + +# Copyright: (c) 2025, StackHPC +# Apache 2 License + +def to_gres_options(stdout): + gres_data = {} # k=gres_opt, v=[label, max_count] # where gres_opt is what would be passed to --gres + gres_data['none'] = ['None', 0] + + for line in stdout.splitlines(): + if '(null)' in line: + continue + partition, gres = line.split(' ') + gres_name, gres_type, gres_number_cores = gres.split(':', maxsplit=2) + gres_count, gres_cores = gres_number_cores.split('(') + + for gres_opt in [gres_name, f'{gres_name}:{gres_type}']: + if gres_opt not in gres_data: + label = f'{gres_type} {gres_name}' if ':' in gres_opt else f'Any {gres_opt}' + gres_data[gres_opt] = [label, gres_count] + elif gres_count > gres_data[gres_name][1]: + gres_data[gres_opt][1] = gres_count + gres_options = [] + for gres_opt in gres_data: + max_count = gres_data[gres_opt][1] + label = gres_data[gres_opt][0] + if gres_opt != 'none': + label += f' (max count: {max_count})' + gres_options.append((label, gres_opt)) + return gres_options + + +# pylint: disable=useless-object-inheritance +class FilterModule(object): + """Ansible core jinja2 filters""" + + # pylint: disable=missing-function-docstring + def filters(self): + return { + "to_gres_options": to_gres_options, + } diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml index 783be8911..f2c97bfc6 100644 --- a/ansible/roles/openondemand/tasks/main.yml +++ b/ansible/roles/openondemand/tasks/main.yml @@ -39,6 +39,16 @@ vars_from: main.yml public: true +- name: Get GRES information + command: + cmd: sinfo --noheader --format "%R %G" # can't use , or : as separator + register: _openondemand_sinfo_gres + # partition_name gres + # e.g. + # gpu gpu:H200:8(S:0-1) + # normal (null) + # no_smi (null) + - ansible.builtin.include_role: name: osc.ood tasks_from: install-apps.yml diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index 1f7859a96..79f0c28ba 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -122,16 +122,30 @@ openondemand_apps_desktop_default: - bc_queue - bc_num_hours - num_cores + - gres + - gres_count - node attributes: desktop: xfce - # bc_account: # i.e. slurm account - # value: root bc_queue: value: "{{ openondemand_desktop_partition | default(none) }}" num_cores: label: Number of cores value: 1 + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 node: label: Node name help: Select a particular node or leave empty to let Slurm pick the next available @@ -144,6 +158,9 @@ openondemand_apps_desktop_default: - <%= "--nodes=1" %> - <%= "--ntasks=#{num_cores}" %> - <%= "--nodelist=#{node}" %> + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_desktop: "{{ {'bc_desktop':openondemand_apps_desktop_default} if openondemand_desktop_partition | default(none) else {} }}" # yamllint disable-line rule:line-length @@ -158,12 +175,32 @@ openondemand_apps_jupyter_default: - bc_queue - bc_num_hours - num_cores + - gres + - gres_count - node attributes: # TODO num_cores: label: Number of cores value: 1 modules: "" + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 + node: + label: Node name + help: Select a particular node or leave empty to let Slurm pick the next available + value: "" extra_jupyter_args: "" bc_queue: value: "{{ openondemand_jupyter_partition | default(none) }}" @@ -182,6 +219,9 @@ openondemand_apps_jupyter_default: - <%= "--nodes=1" %> - <%= "--ntasks=#{num_cores}" %> - <%= "--nodelist=#{node}" %> + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_jupyter: "{{ {'jupyter':openondemand_apps_jupyter_default} if openondemand_jupyter_partition | default(none) else {} }}" openondemand_apps_rstudio_default: @@ -233,6 +273,20 @@ openondemand_apps_rstudio_default: bc_email_on_started: false auto_modules_RStudio-Server: default: false + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 form: - bc_queue - rstudio_module @@ -242,6 +296,8 @@ openondemand_apps_rstudio_default: - ram - bc_num_hours - bc_email_on_started + - gres + - gres_count submit: | --- batch_connect: @@ -261,6 +317,9 @@ openondemand_apps_rstudio_default: - "<%= cores.blank? ? 1 : cores.to_i %>"<% if auto_queues.start_with?("gpu") %> - "--gpus-per-task" - "1"<% end %> + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_rstudio: "{{ {'rstudio':openondemand_apps_rstudio_default} if openondemand_rstudio_partition | default(none) else {} }}" openondemand_apps_matlab_default: @@ -274,6 +333,8 @@ openondemand_apps_matlab_default: - matlab_module - cores - ram + - gres + - gres_count attributes: desktop: xfce # bc_account: # i.e. slurm account @@ -314,6 +375,20 @@ openondemand_apps_matlab_default: step: 1 value: 30 cachable: true + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 submit: | --- script: @@ -327,6 +402,9 @@ openondemand_apps_matlab_default: - "<%= ram.blank? ? 4 : ram.to_i %>G" - "--cpus-per-task" - "<%= cores.blank? ? 1 : cores.to_i %>" + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_matlab: "{{ {'matlab':openondemand_apps_matlab_default} if openondemand_matlab_partition | default(none) else {} }}" openondemand_apps_codeserver_default: From 2a9c0f8d3b542b02a380ab4a9de60aaa66f61f24 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 23 Oct 2025 17:00:34 +0100 Subject: [PATCH 06/13] support multiple gres per node --- .../openondemand/filter_plugins/filters.py | 49 ++++++++++++++----- ansible/roles/openondemand/tasks/main.yml | 5 -- .../inventory/group_vars/all/openondemand.yml | 1 - 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py index 1bf6d8d90..abfd78885 100644 --- a/ansible/roles/openondemand/filter_plugins/filters.py +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -5,22 +5,47 @@ # Apache 2 License def to_gres_options(stdout): + """ Convert sinfo output into a list of GRES options for an Ondemand `select` + widget. + + Parameters: + stdout: Text from `sinfo --noheader --format "%R %G"` + + Returns a list of [label, value] items. This is the format required for + the `options` attribute of a `select` widget [1] where: + - value (str) is a valid entry for the srun/sbatch --gres option [2]. + - label (str) is a user-friendly label with gres name, gres type and + maximum gres count where relevant. + The returned list will always include an entry for no GRES request. + + For example with a single GRES defined of `gpu:H200:8' the following + entries are returned: + - ['None', 'none'] + - ['Any gpu (max count: 8)', 'gpu'] + - ['H200 gpu' (max count: 8)', 'gpu:H200'] + + [1] https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets + [2] https://slurm.schedmd.com/srun.html#OPT_gres + """ gres_data = {} # k=gres_opt, v=[label, max_count] # where gres_opt is what would be passed to --gres gres_data['none'] = ['None', 0] for line in stdout.splitlines(): - if '(null)' in line: - continue - partition, gres = line.split(' ') - gres_name, gres_type, gres_number_cores = gres.split(':', maxsplit=2) - gres_count, gres_cores = gres_number_cores.split('(') - - for gres_opt in [gres_name, f'{gres_name}:{gres_type}']: - if gres_opt not in gres_data: - label = f'{gres_type} {gres_name}' if ':' in gres_opt else f'Any {gres_opt}' - gres_data[gres_opt] = [label, gres_count] - elif gres_count > gres_data[gres_name][1]: - gres_data[gres_opt][1] = gres_count + partition, gres_definitions = line.split() # e.g. 'part1 gpu:H200:8(S:0-1),test:foo:1', or 'part2 (null)' + for gres in gres_definitions.split(','): + if '(null)' in gres: + continue + gres_name, gres_type, gres_count_cores = gres.split(':', maxsplit=2) + gres_count = gres_count_cores.split('(')[0] # may or may not have the e.g. '(S:0-1)' core definition + for gres_opt in [gres_name, f'{gres_name}:{gres_type}']: + if gres_opt not in gres_data: + label = f'{gres_type} {gres_name}' if ':' in gres_opt else f'Any {gres_opt}' + gres_data[gres_opt] = [label, gres_count] + elif len(gres_data[gres_name]) == 1: + raise ValueError(gres_data[gres_name]) + elif gres_count > gres_data[gres_name][1]: + gres_data[gres_opt][1] = gres_count + gres_options = [] for gres_opt in gres_data: max_count = gres_data[gres_opt][1] diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml index f2c97bfc6..37e4867d0 100644 --- a/ansible/roles/openondemand/tasks/main.yml +++ b/ansible/roles/openondemand/tasks/main.yml @@ -43,11 +43,6 @@ command: cmd: sinfo --noheader --format "%R %G" # can't use , or : as separator register: _openondemand_sinfo_gres - # partition_name gres - # e.g. - # gpu gpu:H200:8(S:0-1) - # normal (null) - # no_smi (null) - ansible.builtin.include_role: name: osc.ood diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index 79f0c28ba..23efb43e4 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -204,7 +204,6 @@ openondemand_apps_jupyter_default: extra_jupyter_args: "" bc_queue: value: "{{ openondemand_jupyter_partition | default(none) }}" - node: "" submit: | --- batch_connect: From 84bd1f49323ae1757523b74ab505a2429ecab72b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 11:45:34 +0100 Subject: [PATCH 07/13] fix linter errors --- ansible/roles/openondemand/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/openondemand/README.md b/ansible/roles/openondemand/README.md index 5964c5b32..a215173b1 100644 --- a/ansible/roles/openondemand/README.md +++ b/ansible/roles/openondemand/README.md @@ -77,7 +77,7 @@ This role enables SSL on the Open Ondemand server, using the following self-sign to provide a drop-down for resource/GRES selection in application forms. The default constructs a list from all GRES definitions in the cluster. See the `option` attribute of the Select Field [form widget](https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets). - + ### Monitoring - `openondemand_exporter`: Optional. Install the Prometheus [ondemand_exporter](https://github.com/OSC/ondemand_exporter) on the `openondemand` node to export metrics about Open Ondemand itself. Default `true`. From 59f177848caf74c69aad2e4d0d5bc3f21f351072 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 12:02:08 +0100 Subject: [PATCH 08/13] fix pyink lint errors --- .../openondemand/filter_plugins/filters.py | 99 ++++++++++--------- 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py index abfd78885..ae13df241 100644 --- a/ansible/roles/openondemand/filter_plugins/filters.py +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -4,56 +4,67 @@ # Copyright: (c) 2025, StackHPC # Apache 2 License + def to_gres_options(stdout): - """ Convert sinfo output into a list of GRES options for an Ondemand `select` - widget. + """Convert sinfo output into a list of GRES options for an Ondemand `select` + widget. + + Parameters: + stdout: Text from `sinfo --noheader --format "%R %G"` - Parameters: - stdout: Text from `sinfo --noheader --format "%R %G"` + Returns a list of [label, value] items. This is the format required for + the `options` attribute of a `select` widget [1] where: + - value (str) is a valid entry for the srun/sbatch --gres option [2]. + - label (str) is a user-friendly label with gres name, gres type and + maximum gres count where relevant. + The returned list will always include an entry for no GRES request. - Returns a list of [label, value] items. This is the format required for - the `options` attribute of a `select` widget [1] where: - - value (str) is a valid entry for the srun/sbatch --gres option [2]. - - label (str) is a user-friendly label with gres name, gres type and - maximum gres count where relevant. - The returned list will always include an entry for no GRES request. + For example with a single GRES defined of `gpu:H200:8' the following + entries are returned: + - ['None', 'none'] + - ['Any gpu (max count: 8)', 'gpu'] + - ['H200 gpu' (max count: 8)', 'gpu:H200'] - For example with a single GRES defined of `gpu:H200:8' the following - entries are returned: - - ['None', 'none'] - - ['Any gpu (max count: 8)', 'gpu'] - - ['H200 gpu' (max count: 8)', 'gpu:H200'] + [1] https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets + [2] https://slurm.schedmd.com/srun.html#OPT_gres + """ + gres_data = ( + {} + ) # k=gres_opt, v=[label, max_count] # where gres_opt is what would be passed to --gres + gres_data["none"] = ["None", 0] - [1] https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets - [2] https://slurm.schedmd.com/srun.html#OPT_gres - """ - gres_data = {} # k=gres_opt, v=[label, max_count] # where gres_opt is what would be passed to --gres - gres_data['none'] = ['None', 0] - - for line in stdout.splitlines(): - partition, gres_definitions = line.split() # e.g. 'part1 gpu:H200:8(S:0-1),test:foo:1', or 'part2 (null)' - for gres in gres_definitions.split(','): - if '(null)' in gres: - continue - gres_name, gres_type, gres_count_cores = gres.split(':', maxsplit=2) - gres_count = gres_count_cores.split('(')[0] # may or may not have the e.g. '(S:0-1)' core definition - for gres_opt in [gres_name, f'{gres_name}:{gres_type}']: - if gres_opt not in gres_data: - label = f'{gres_type} {gres_name}' if ':' in gres_opt else f'Any {gres_opt}' - gres_data[gres_opt] = [label, gres_count] - elif len(gres_data[gres_name]) == 1: - raise ValueError(gres_data[gres_name]) - elif gres_count > gres_data[gres_name][1]: - gres_data[gres_opt][1] = gres_count + for line in stdout.splitlines(): + partition, gres_definitions = ( + line.split() + ) # e.g. 'part1 gpu:H200:8(S:0-1),test:foo:1', or 'part2 (null)' + for gres in gres_definitions.split(","): + if "(null)" in gres: + continue + gres_name, gres_type, gres_count_cores = gres.split(":", maxsplit=2) + gres_count = gres_count_cores.split("(")[ + 0 + ] # may or may not have the e.g. '(S:0-1)' core definition + for gres_opt in [gres_name, f"{gres_name}:{gres_type}"]: + if gres_opt not in gres_data: + label = ( + f"{gres_type} {gres_name}" + if ":" in gres_opt + else f"Any {gres_opt}" + ) + gres_data[gres_opt] = [label, gres_count] + elif len(gres_data[gres_name]) == 1: + raise ValueError(gres_data[gres_name]) + elif gres_count > gres_data[gres_name][1]: + gres_data[gres_opt][1] = gres_count - gres_options = [] - for gres_opt in gres_data: - max_count = gres_data[gres_opt][1] - label = gres_data[gres_opt][0] - if gres_opt != 'none': - label += f' (max count: {max_count})' - gres_options.append((label, gres_opt)) - return gres_options + gres_options = [] + for gres_opt in gres_data: + max_count = gres_data[gres_opt][1] + label = gres_data[gres_opt][0] + if gres_opt != "none": + label += f" (max count: {max_count})" + gres_options.append((label, gres_opt)) + return gres_options # pylint: disable=useless-object-inheritance From b924286c08a7798f29c8d8b49d2623581b1784ca Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 14:53:22 +0100 Subject: [PATCH 09/13] fix python linting errors --- .../roles/openondemand/filter_plugins/filters.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py index ae13df241..a74365fd3 100644 --- a/ansible/roles/openondemand/filter_plugins/filters.py +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -27,14 +27,15 @@ def to_gres_options(stdout): [1] https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets [2] https://slurm.schedmd.com/srun.html#OPT_gres - """ - gres_data = ( - {} - ) # k=gres_opt, v=[label, max_count] # where gres_opt is what would be passed to --gres + """ # noqa: E501 pylint: disable=line-too-long + + gres_data = {} + # key=gres_opt, what would be passed to --gres + # value=[label, max_count] gres_data["none"] = ["None", 0] for line in stdout.splitlines(): - partition, gres_definitions = ( + partition, gres_definitions = ( # pylint: disable=unused-variable line.split() ) # e.g. 'part1 gpu:H200:8(S:0-1),test:foo:1', or 'part2 (null)' for gres in gres_definitions.split(","): @@ -58,7 +59,7 @@ def to_gres_options(stdout): gres_data[gres_opt][1] = gres_count gres_options = [] - for gres_opt in gres_data: + for gres_opt in gres_data: # pylint: disable=consider-using-dict-items max_count = gres_data[gres_opt][1] label = gres_data[gres_opt][0] if gres_opt != "none": @@ -68,6 +69,7 @@ def to_gres_options(stdout): # pylint: disable=useless-object-inheritance +# pylint: disable=too-few-public-methods class FilterModule(object): """Ansible core jinja2 filters""" From 37c5a2b8cafc6e13db231ed118613028536d680a Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 15:48:52 +0100 Subject: [PATCH 10/13] fix ansible-lint errors --- ansible/roles/openondemand/defaults/main.yml | 4 ++-- ansible/roles/openondemand/tasks/main.yml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ansible/roles/openondemand/defaults/main.yml b/ansible/roles/openondemand/defaults/main.yml index 72555881b..7a4430690 100644 --- a/ansible/roles/openondemand/defaults/main.yml +++ b/ansible/roles/openondemand/defaults/main.yml @@ -106,5 +106,5 @@ openondemand_osc_ood_defaults: openondemand_code_server_version: 4.102.2 openondemand_rstudio_version: 2025.05.1-513 openondemand_matlab_version: '' -# Below -openondemand_gres_options: "{{ _openondemand_sinfo_gres.stdout | to_gres_options }}" \ No newline at end of file +# Below is automatically calculated during role run: +openondemand_gres_options: "{{ _openondemand_sinfo_gres.stdout | to_gres_options }}" diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml index 37e4867d0..6fb6edb88 100644 --- a/ansible/roles/openondemand/tasks/main.yml +++ b/ansible/roles/openondemand/tasks/main.yml @@ -40,8 +40,9 @@ public: true - name: Get GRES information - command: + ansible.builtin.command: cmd: sinfo --noheader --format "%R %G" # can't use , or : as separator + changed_when: true register: _openondemand_sinfo_gres - ansible.builtin.include_role: From aaeddc3f2339683cafdce1f007d815f2d823c12f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 16:34:33 +0100 Subject: [PATCH 11/13] add partition info to GRES selection --- .../openondemand/filter_plugins/filters.py | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py index a74365fd3..416049930 100644 --- a/ansible/roles/openondemand/filter_plugins/filters.py +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -22,29 +22,31 @@ def to_gres_options(stdout): For example with a single GRES defined of `gpu:H200:8' the following entries are returned: - ['None', 'none'] - - ['Any gpu (max count: 8)', 'gpu'] - - ['H200 gpu' (max count: 8)', 'gpu:H200'] + - ['Any gpu (max count=8, partitions=standard,long)', 'gpu'] + - ['H200 gpu (max count=8, partitions=standard,long)', 'gpu:H200'] [1] https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets [2] https://slurm.schedmd.com/srun.html#OPT_gres """ # noqa: E501 pylint: disable=line-too-long gres_data = {} - # key=gres_opt, what would be passed to --gres - # value=[label, max_count] - gres_data["none"] = ["None", 0] + # key=gres_opt - 'name' or 'name:type', i.e. what would be passed to --gres + # value={label:str, max_count: int, partitions=[]} + gres_data["none"] = {'label':'None', 'max_count':0, 'partitions':['all']} for line in stdout.splitlines(): - partition, gres_definitions = ( # pylint: disable=unused-variable - line.split() - ) # e.g. 'part1 gpu:H200:8(S:0-1),test:foo:1', or 'part2 (null)' + # line examples: + # 'part1 gpu:H200:8(S:0-1),test:foo:1' + # 'part2 (null)' + # - First example shows multiple GRES per partition + # - Core suffix e.g. '(S:0-1)' only exists for auto-detected gres + # - stackhpc.openhpc role guarantees that name:type:count all exist + partition, gres_definitions = (line.split()) for gres in gres_definitions.split(","): if "(null)" in gres: continue gres_name, gres_type, gres_count_cores = gres.split(":", maxsplit=2) - gres_count = gres_count_cores.split("(")[ - 0 - ] # may or may not have the e.g. '(S:0-1)' core definition + gres_count = gres_count_cores.split("(")[0] for gres_opt in [gres_name, f"{gres_name}:{gres_type}"]: if gres_opt not in gres_data: label = ( @@ -52,18 +54,19 @@ def to_gres_options(stdout): if ":" in gres_opt else f"Any {gres_opt}" ) - gres_data[gres_opt] = [label, gres_count] - elif len(gres_data[gres_name]) == 1: - raise ValueError(gres_data[gres_name]) - elif gres_count > gres_data[gres_name][1]: - gres_data[gres_opt][1] = gres_count + gres_data[gres_opt] = {'label':label, 'max_count':gres_count, 'partitions':[partition]} + else: + gres_data[gres_opt]['partitions'].append(partition) + if gres_count > gres_data[gres_name]['max_count']: + gres_data[gres_opt]['max_count'] = gres_count gres_options = [] for gres_opt in gres_data: # pylint: disable=consider-using-dict-items - max_count = gres_data[gres_opt][1] - label = gres_data[gres_opt][0] + max_count = gres_data[gres_opt]['max_count'] + partitions = gres_data[gres_opt]['partitions'] + label = gres_data[gres_opt]['label'] if gres_opt != "none": - label += f" (max count: {max_count})" + label += f" (max count={max_count}, partitions={','.join(partitions)})" gres_options.append((label, gres_opt)) return gres_options From 6ed4a2143934b4aa1f2a6e5331e4271428a8e8ef Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 16:35:02 +0100 Subject: [PATCH 12/13] tidy GRES labels --- .../common/inventory/group_vars/all/openondemand.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index 23efb43e4..84a6c2055 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -140,7 +140,7 @@ openondemand_apps_desktop_default: options: "{{ openondemand_gres_options }}" gres_count: label: Resource count - help: Count of GPU or other resources + help: Count of GPU or other Slurm GRES resources required: false widget: number_field value: 1 @@ -191,7 +191,7 @@ openondemand_apps_jupyter_default: options: "{{ openondemand_gres_options }}" gres_count: label: Resource count - help: Count of GPU or other resources + help: Count of GPU or other Slurm GRES resources required: false widget: number_field value: 1 @@ -280,7 +280,7 @@ openondemand_apps_rstudio_default: options: "{{ openondemand_gres_options }}" gres_count: label: Resource count - help: Count of GPU or other resources + help: Count of GPU or other Slurm GRES resources required: false widget: number_field value: 1 @@ -382,7 +382,7 @@ openondemand_apps_matlab_default: options: "{{ openondemand_gres_options }}" gres_count: label: Resource count - help: Count of GPU or other resources + help: Count of GPU or other Slurm GRES resources required: false widget: number_field value: 1 From 4e6ce198b6baf1acc484ee17e46b7b15bb18d14b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 Oct 2025 17:18:33 +0100 Subject: [PATCH 13/13] fix lint errors --- .../openondemand/filter_plugins/filters.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py index 416049930..25f7e4aaa 100644 --- a/ansible/roles/openondemand/filter_plugins/filters.py +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -32,7 +32,7 @@ def to_gres_options(stdout): gres_data = {} # key=gres_opt - 'name' or 'name:type', i.e. what would be passed to --gres # value={label:str, max_count: int, partitions=[]} - gres_data["none"] = {'label':'None', 'max_count':0, 'partitions':['all']} + gres_data["none"] = {"label": "None", "max_count": 0, "partitions": ["all"]} for line in stdout.splitlines(): # line examples: @@ -41,7 +41,7 @@ def to_gres_options(stdout): # - First example shows multiple GRES per partition # - Core suffix e.g. '(S:0-1)' only exists for auto-detected gres # - stackhpc.openhpc role guarantees that name:type:count all exist - partition, gres_definitions = (line.split()) + partition, gres_definitions = line.split() for gres in gres_definitions.split(","): if "(null)" in gres: continue @@ -54,17 +54,21 @@ def to_gres_options(stdout): if ":" in gres_opt else f"Any {gres_opt}" ) - gres_data[gres_opt] = {'label':label, 'max_count':gres_count, 'partitions':[partition]} + gres_data[gres_opt] = { + "label": label, + "max_count": gres_count, + "partitions": [partition], + } else: - gres_data[gres_opt]['partitions'].append(partition) - if gres_count > gres_data[gres_name]['max_count']: - gres_data[gres_opt]['max_count'] = gres_count + gres_data[gres_opt]["partitions"].append(partition) + if gres_count > gres_data[gres_name]["max_count"]: + gres_data[gres_opt]["max_count"] = gres_count gres_options = [] for gres_opt in gres_data: # pylint: disable=consider-using-dict-items - max_count = gres_data[gres_opt]['max_count'] - partitions = gres_data[gres_opt]['partitions'] - label = gres_data[gres_opt]['label'] + max_count = gres_data[gres_opt]["max_count"] + partitions = gres_data[gres_opt]["partitions"] + label = gres_data[gres_opt]["label"] if gres_opt != "none": label += f" (max count={max_count}, partitions={','.join(partitions)})" gres_options.append((label, gres_opt))