diff --git a/ansible/roles/openondemand/README.md b/ansible/roles/openondemand/README.md index b1fb6731c..a215173b1 100644 --- a/ansible/roles/openondemand/README.md +++ b/ansible/roles/openondemand/README.md @@ -73,6 +73,10 @@ This role enables SSL on the Open Ondemand server, using the following self-sign - `openondemand_desktop_screensaver`: Optional. Whether to enable screen locking/screensaver. **NB:** Users must have passwords if this is enabled. Bool, default `false`. - `openondemand_filesapp_paths`: List of paths (in addition to $HOME, which is always added) to include shortcuts to within the Files dashboard app. - `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers. Requires a corresponding group named "openondemand_jupyter" and entry in openhpc_partitions. +- `openondemand_gres_options`: Optional. A list of `[label, value]` items used + to provide a drop-down for resource/GRES selection in application forms. The + default constructs a list from all GRES definitions in the cluster. See the + `option` attribute of the Select Field [form widget](https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets). ### Monitoring diff --git a/ansible/roles/openondemand/defaults/main.yml b/ansible/roles/openondemand/defaults/main.yml index 6fa99bdfb..ecb5a8d12 100644 --- a/ansible/roles/openondemand/defaults/main.yml +++ b/ansible/roles/openondemand/defaults/main.yml @@ -105,6 +105,9 @@ openondemand_osc_ood_defaults: # Use repo file provided by dnf_repos by default ood_use_existing_repo_file: true +# Apps: openondemand_code_server_version: 4.102.2 openondemand_rstudio_version: 2025.05.1-513 openondemand_matlab_version: '' +# Below is automatically calculated during role run: +openondemand_gres_options: "{{ _openondemand_sinfo_gres.stdout | to_gres_options }}" diff --git a/ansible/roles/openondemand/filter_plugins/filters.py b/ansible/roles/openondemand/filter_plugins/filters.py new file mode 100644 index 000000000..25f7e4aaa --- /dev/null +++ b/ansible/roles/openondemand/filter_plugins/filters.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# pylint: disable=missing-module-docstring + +# Copyright: (c) 2025, StackHPC +# Apache 2 License + + +def to_gres_options(stdout): + """Convert sinfo output into a list of GRES options for an Ondemand `select` + widget. + + Parameters: + stdout: Text from `sinfo --noheader --format "%R %G"` + + Returns a list of [label, value] items. This is the format required for + the `options` attribute of a `select` widget [1] where: + - value (str) is a valid entry for the srun/sbatch --gres option [2]. + - label (str) is a user-friendly label with gres name, gres type and + maximum gres count where relevant. + The returned list will always include an entry for no GRES request. + + For example with a single GRES defined of `gpu:H200:8' the following + entries are returned: + - ['None', 'none'] + - ['Any gpu (max count=8, partitions=standard,long)', 'gpu'] + - ['H200 gpu (max count=8, partitions=standard,long)', 'gpu:H200'] + + [1] https://osc.github.io/ood-documentation/latest/how-tos/app-development/interactive/form-widgets.html#form-widgets + [2] https://slurm.schedmd.com/srun.html#OPT_gres + """ # noqa: E501 pylint: disable=line-too-long + + gres_data = {} + # key=gres_opt - 'name' or 'name:type', i.e. what would be passed to --gres + # value={label:str, max_count: int, partitions=[]} + gres_data["none"] = {"label": "None", "max_count": 0, "partitions": ["all"]} + + for line in stdout.splitlines(): + # line examples: + # 'part1 gpu:H200:8(S:0-1),test:foo:1' + # 'part2 (null)' + # - First example shows multiple GRES per partition + # - Core suffix e.g. '(S:0-1)' only exists for auto-detected gres + # - stackhpc.openhpc role guarantees that name:type:count all exist + partition, gres_definitions = line.split() + for gres in gres_definitions.split(","): + if "(null)" in gres: + continue + gres_name, gres_type, gres_count_cores = gres.split(":", maxsplit=2) + gres_count = gres_count_cores.split("(")[0] + for gres_opt in [gres_name, f"{gres_name}:{gres_type}"]: + if gres_opt not in gres_data: + label = ( + f"{gres_type} {gres_name}" + if ":" in gres_opt + else f"Any {gres_opt}" + ) + gres_data[gres_opt] = { + "label": label, + "max_count": gres_count, + "partitions": [partition], + } + else: + gres_data[gres_opt]["partitions"].append(partition) + if gres_count > gres_data[gres_name]["max_count"]: + gres_data[gres_opt]["max_count"] = gres_count + + gres_options = [] + for gres_opt in gres_data: # pylint: disable=consider-using-dict-items + max_count = gres_data[gres_opt]["max_count"] + partitions = gres_data[gres_opt]["partitions"] + label = gres_data[gres_opt]["label"] + if gres_opt != "none": + label += f" (max count={max_count}, partitions={','.join(partitions)})" + gres_options.append((label, gres_opt)) + return gres_options + + +# pylint: disable=useless-object-inheritance +# pylint: disable=too-few-public-methods +class FilterModule(object): + """Ansible core jinja2 filters""" + + # pylint: disable=missing-function-docstring + def filters(self): + return { + "to_gres_options": to_gres_options, + } diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml index 783be8911..6fb6edb88 100644 --- a/ansible/roles/openondemand/tasks/main.yml +++ b/ansible/roles/openondemand/tasks/main.yml @@ -39,6 +39,12 @@ vars_from: main.yml public: true +- name: Get GRES information + ansible.builtin.command: + cmd: sinfo --noheader --format "%R %G" # can't use , or : as separator + changed_when: true + register: _openondemand_sinfo_gres + - ansible.builtin.include_role: name: osc.ood tasks_from: install-apps.yml diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index 1f7859a96..84a6c2055 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -122,16 +122,30 @@ openondemand_apps_desktop_default: - bc_queue - bc_num_hours - num_cores + - gres + - gres_count - node attributes: desktop: xfce - # bc_account: # i.e. slurm account - # value: root bc_queue: value: "{{ openondemand_desktop_partition | default(none) }}" num_cores: label: Number of cores value: 1 + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other Slurm GRES resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 node: label: Node name help: Select a particular node or leave empty to let Slurm pick the next available @@ -144,6 +158,9 @@ openondemand_apps_desktop_default: - <%= "--nodes=1" %> - <%= "--ntasks=#{num_cores}" %> - <%= "--nodelist=#{node}" %> + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_desktop: "{{ {'bc_desktop':openondemand_apps_desktop_default} if openondemand_desktop_partition | default(none) else {} }}" # yamllint disable-line rule:line-length @@ -158,16 +175,35 @@ openondemand_apps_jupyter_default: - bc_queue - bc_num_hours - num_cores + - gres + - gres_count - node attributes: # TODO num_cores: label: Number of cores value: 1 modules: "" + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other Slurm GRES resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 + node: + label: Node name + help: Select a particular node or leave empty to let Slurm pick the next available + value: "" extra_jupyter_args: "" bc_queue: value: "{{ openondemand_jupyter_partition | default(none) }}" - node: "" submit: | --- batch_connect: @@ -182,6 +218,9 @@ openondemand_apps_jupyter_default: - <%= "--nodes=1" %> - <%= "--ntasks=#{num_cores}" %> - <%= "--nodelist=#{node}" %> + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_jupyter: "{{ {'jupyter':openondemand_apps_jupyter_default} if openondemand_jupyter_partition | default(none) else {} }}" openondemand_apps_rstudio_default: @@ -233,6 +272,20 @@ openondemand_apps_rstudio_default: bc_email_on_started: false auto_modules_RStudio-Server: default: false + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other Slurm GRES resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 form: - bc_queue - rstudio_module @@ -242,6 +295,8 @@ openondemand_apps_rstudio_default: - ram - bc_num_hours - bc_email_on_started + - gres + - gres_count submit: | --- batch_connect: @@ -261,6 +316,9 @@ openondemand_apps_rstudio_default: - "<%= cores.blank? ? 1 : cores.to_i %>"<% if auto_queues.start_with?("gpu") %> - "--gpus-per-task" - "1"<% end %> + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_rstudio: "{{ {'rstudio':openondemand_apps_rstudio_default} if openondemand_rstudio_partition | default(none) else {} }}" openondemand_apps_matlab_default: @@ -274,6 +332,8 @@ openondemand_apps_matlab_default: - matlab_module - cores - ram + - gres + - gres_count attributes: desktop: xfce # bc_account: # i.e. slurm account @@ -314,6 +374,20 @@ openondemand_apps_matlab_default: step: 1 value: 30 cachable: true + gres: + label: Resources + help: Select GPU or other Slurm GRES resources + required: true + widget: select + options: "{{ openondemand_gres_options }}" + gres_count: + label: Resource count + help: Count of GPU or other Slurm GRES resources + required: false + widget: number_field + value: 1 + min: 1 + step: 1 submit: | --- script: @@ -327,6 +401,9 @@ openondemand_apps_matlab_default: - "<%= ram.blank? ? 4 : ram.to_i %>G" - "--cpus-per-task" - "<%= cores.blank? ? 1 : cores.to_i %>" + <% if gres != 'none' %> + - <%= "--gres=#{gres}:#{gres_count}" %> + <% end %> openondemand_apps_matlab: "{{ {'matlab':openondemand_apps_matlab_default} if openondemand_matlab_partition | default(none) else {} }}" openondemand_apps_codeserver_default: