diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index 798e0c4bf..2df246217 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -1,5 +1,5 @@ --- -name: Build OFED packages +name: Build OFED kernel modules on: workflow_dispatch: inputs: @@ -19,11 +19,11 @@ on: env: ANSIBLE_FORCE_COLOR: True - KAYOBE_ENVIRONMENT: ci-builder + KAYOBE_ENVIRONMENT: ci-doca-builder KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} jobs: overcloud-ofed-packages: - name: Build OFED packages + name: Build OFED kernel modules if: github.repository == 'stackhpc/stackhpc-kayobe-config' runs-on: arc-skc-host-image-builder-runner permissions: {} @@ -48,6 +48,11 @@ jobs: BRANCH=$(awk -F'=' '/defaultbranch/ {print $2}' src/kayobe-config/.gitreview) echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," >> $GITHUB_OUTPUT + - name: Generate OFED tag + id: ofed_tag + run: | + echo "ofed_tag=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_OUTPUT + - name: Clone StackHPC Kayobe repository uses: actions/checkout@v4 with: @@ -86,6 +91,7 @@ jobs: id: image_tag run: | echo image_tag=$(grep stackhpc_rocky_9_overcloud_host_image_version: etc/kayobe/pulp-host-image-versions.yml | awk '{print $2}') >> $GITHUB_OUTPUT + working-directory: ${{ github.workspace }}/src/kayobe-config # Use the image override if set, otherwise use overcloud-os_distribution-os_release-tag - name: Output image name @@ -145,13 +151,13 @@ jobs: - name: Write Terraform outputs run: | - cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-outputs.yml + cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-doca-builder/tf-outputs.yml ${{ steps.tf_outputs.outputs.stdout }} EOF - name: Write Terraform network config run: | - cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-network-allocation.yml + cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-doca-builder/tf-network-allocation.yml --- aio_ips: builder: "{{ access_ip_v4.value }}" @@ -176,13 +182,13 @@ jobs: - name: Bootstrap the control host run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe control host bootstrap - name: Run growroot playbook run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -190,23 +196,23 @@ jobs: - name: Configure the seed host (Builder VM) run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe seed host configure --skip-tags network,docker + source src/kayobe-config/kayobe-env --environment ci-doca-builder && + kayobe seed host configure --skip-tags network,docker,docker-registry env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} - name: Run a distro-sync run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe seed host command run --become --command "dnf distro-sync --refresh" + source src/kayobe-config/kayobe-env --environment ci-doca-builder && + kayobe seed host command run --become --command "dnf distro-sync --refresh --assumeyes" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} - name: Reset BLS entries on the seed host run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reset-bls-entries.yml \ -e "reset_bls_host=ofed-builder" env: @@ -215,7 +221,7 @@ jobs: - name: Disable noexec in /var/tmp run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe seed host command run --become --command "sed -i 's/noexec,//g' /etc/fstab" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -223,7 +229,7 @@ jobs: - name: Reboot to apply the kernel update run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reboot.yml env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -231,7 +237,7 @@ jobs: - name: Run OFED builder playbook run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/build-ofed-rocky.yml env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -239,8 +245,9 @@ jobs: - name: Run OFED upload playbook run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml + source src/kayobe-config/kayobe-env --environment ci-doca-builder && + kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml \ + -e "ofed_tag=${{ steps.ofed_tag.outputs.ofed_tag }}" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index e53b0f125..c993f6748 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -4,19 +4,17 @@ OFED Warning: Experimental workflow subject to change -This section documents the workflow for building OFED packages for Release train integration. - -The workflow builds the OFED kernel modules against the latest available kernel in Release train -(as configured in SKC) and compiles them into RPM packages to be uploaded to Ark. Addtionally, -this workflow downloads the userspace OFED packages from the Nvidia repository and uploads these -to Ark. +The Nvidia DOCA framework is distributed as part of StackHPC Release Train for OFED driver support, +this repository is synced into Ark as part of the Release Train workflows, however to ensure +compatibility with Release Train packages, we are required to build OFED modules with support for +the latest Release Train kernel. Workflow ======== The workflow uses workflow_dispatch to manually request an OFED build, which will deploy a builder VM, apply kayobe config to the builder, upgrade the kernel, reboot, then run two Ansible playbooks -for building and uploading OFED to Ark. +for building and uploading OFED modules to Ark. Pre-requisites -------------- @@ -25,31 +23,82 @@ Before building OFED packages, the workflow will ensure that: * A full distro-sync has taken place, ensuring the kernel is upgraded. -* The bootloader has been configured to use the latest kernel +* The bootloader has been configured to use the latest kernel (reset-bls-entries.yml) * noexec is disabled in the temporary logical volume. build-ofed ---------- -Currently we only support building Rocky Linux 9 OFED packages. - -In order to setup OFED, we're required to build kernel modules for the OFED drivers as -the kernels we provide in release train are unsupported by OFED. To accomplish this we -will need to use the doca-kernel-support from the doca-extra repository. +Currently we only support building Rocky Linux 9 OFED kernel module packages. -We will need to instll dependencies in order to build the OFED kernel modules, and these -are installed at the beginning of the build playbook. We also install base and appstream -dependencies of userspace OFED packages here, this is intended to stop these dependencies -being pulled in later when we download the OFED packages from the doca-host repository. +The Build OFED module workflow will check that the filesystem is configured (noexec disabled) +to allow the DOCA build script to run. The workflow will also install any necessary dependencies +for the module build. -At the end of the playbook following the kernel module build, the OFED userspace packages -are downloaded from the upstream repository in order to upload these to Ark. +The build script will output a ``doca-kernel-repo`` RPM which contains all kernel modules built +as part of the workflow. When this RPM is installed, the repofile is created pointing to the +modules in ``/usr/share/doca-host-/Modules//`` on the host. push-ofed --------- -As we're not syncing OFED from any upstream source, and are instead creating our own -repository of custom packages, we will be required to setup the Pulp distribution/publication -and upload the content directly to Ark. This playbook uses the Pulp CLI to upload the RPMs -to Ark. +As mentioned above, the DOCA repository is synced into the ``doca`` repository in Ark. This workflow +will upload the ``doca-kernel-repo`` RPM to a separate repository named ``doca-modules``. The version +for this repository is set in ``pulp-repo-versions.yml`` and is disabled for local pulp syncs by +default. + +Install process +=============== + +Release Train configuration +--------------------------- + +DOCA repositories will need to be synced to the local Pulp service, Ensure the DOCA +hosts added to the ``mlnx`` group before running a package sync, if the group is not +empty DOCA will be synced into the local Pulp. The local Pulp can be synced with Ark +by running: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-repo-sync.yml + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-repo-publish.yml + +DOCA repositories can be templated to hosts by running Kayobe host configure. + +.. code-block:: console + + kayobe overcloud host configure -t dnf + +StackHPC DOCA kernel modules will require the latest kernel version available in Ark for +the current Rocky minor version. You should ensure that packages are up to date by running +a package update, which can also be limited to hosts in the ``mlnx`` group. + +.. code-block:: console + + kayobe overcloud host package update --packages "*" --limit mlnx + +To ensure the latest kernel is the default on boot, the bootloader entires will need +to be reset before rebooting. + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reset-bls-entires.yml -e reset_bls_host=mlnx + +The hosts can now be rebooted to use the latest kernel, a rolling reboot may be applicable +here to reduce distruptions. See the `package updates documentation `. + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reboot.yml --limit mlnx + +install-doca +------------ + +A playbook is provided to install DOCA on hosts in the ``mlnx`` group. Ensure this group +is configured to include the hosts you wish to install DOCA on. To run the install +playbook: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/install-doca.yml diff --git a/etc/kayobe/ansible/build-ofed-rocky.yml b/etc/kayobe/ansible/build-ofed-rocky.yml index d7e925547..343183078 100644 --- a/etc/kayobe/ansible/build-ofed-rocky.yml +++ b/etc/kayobe/ansible/build-ofed-rocky.yml @@ -1,5 +1,5 @@ --- -- name: Build OFED packages +- name: Build OFED kernel modules become: true hosts: ofed-builder gather_facts: false @@ -17,57 +17,29 @@ - name: Install package dependencies ansible.builtin.dnf: name: - - kpartx - - perl - - rpm-build + - autoconf - automake - - patch - - kernel + - createrepo + - cmake-filesystem + - doca-extra + - gcc-gfortran - kernel-devel - - autoconf - - pciutils - - kernel-modules-extra - kernel-rpm-macros - - lsof + - kernel-modules-extra + - kpartx - libtool - - tk - - gcc-gfortran - - tcl - - createrepo - - cmake-filesystem - libnl3-devel + - lsof + - patch + - pciutils + - perl - python3-devel + - rpm-build + - tcl + - tk state: latest update_cache: true - - name: Add DOCA host repository package - ansible.builtin.dnf: - name: "https://developer.nvidia.com/downloads/networking/secure/doca-sdk/DOCA_2.8/doca-host-2.8.0-204000_{{ stackhpc_pulp_doca_ofed_version }}_rhel9{{ stackhpc_pulp_repo_rocky_9_minor_version }}.x86_64.rpm" - disable_gpg_check: true - - - name: Install DOCA extra packages - ansible.builtin.dnf: - name: doca-extra - - - name: Create build directory - ansible.builtin.file: - path: /home/cloud-user/ofed - state: directory - mode: "0777" - - - name: Set build directory - ansible.builtin.replace: - path: /opt/mellanox/doca/tools/doca-kernel-support - regexp: TMP_DIR=\$1 - replace: TMP_DIR=/home/cloud-user/ofed - - name: Build OFED kernel modules - ansible.builtin.shell: - cmd: | - /opt/mellanox/doca/tools/doca-kernel-support - - - name: Download OFED userspace packages - ansible.builtin.dnf: - name: doca-ofed-userspace - download_only: true - download_dir: /home/cloud-user/ofed + ansible.builtin.command: + cmd: /opt/mellanox/doca/tools/doca-kernel-support diff --git a/etc/kayobe/ansible/install-doca.yml b/etc/kayobe/ansible/install-doca.yml new file mode 100644 index 000000000..1a0fee8c3 --- /dev/null +++ b/etc/kayobe/ansible/install-doca.yml @@ -0,0 +1,28 @@ +--- +- name: Install DOCA + become: true + hosts: mlnx + gather_facts: true + tasks: + - name: Get running kernel + ansible.builtin.command: + cmd: "uname -r" + register: kernel + + - name: Install kernel repo + ansible.builtin.dnf: + name: doca-kernel-repo + state: latest + update_cache: true + + - name: Ensure correct priority for DOCA modules + ansible.builtin.lineinfile: + line: "priority=-2" + insertafter: EOF + path: "/etc/yum.repos.d/doca-kernel-{{ kernel.stdout }}.repo" + + - name: Install DOCA OFED + ansible.builtin.dnf: + name: doca-ofed + state: latest + update_cache: true diff --git a/etc/kayobe/ansible/push-ofed.yml b/etc/kayobe/ansible/push-ofed.yml index 3b1130c20..49e3f645a 100644 --- a/etc/kayobe/ansible/push-ofed.yml +++ b/etc/kayobe/ansible/push-ofed.yml @@ -1,45 +1,71 @@ --- - name: Push OFED packages hosts: ofed-builder + vars: + venv: "/opt/kayobe/venvs/kayobe" + doca_extract_path: "/home/stack/doca" tasks: + - name: Get OFED module repo variables + ansible.builtin.set_fact: + doca_modules_repo_name: "{{ stackhpc_pulp_rpm_repos | selectattr('name', 'search', 'OFED') | map(attribute='name') | join('') }}" + doca_modules_repo_base_path: "{{ stackhpc_pulp_rpm_repos | selectattr('name', 'search', 'OFED') | map(attribute='base_path') | join('') }}" + doca_modules_repo_distribution_name: "{{ stackhpc_pulp_rpm_repos | selectattr('name', 'search', 'OFED') | map(attribute='distribution_name') | join('') }}" + - name: Install python dependencies ansible.builtin.pip: name: pulp-cli - - name: Create Pulp repository for OFED + - name: Ensure Pulp configuration directory exists + ansible.builtin.file: + path: /home/stack/.config/pulp/ + state: directory + recurse: true + + - name: Setup Pulp credentials + ansible.builtin.blockinfile: + path: /home/stack/.config/pulp/cli.toml + create: true + block: | + [cli] + base_url = '{{ stackhpc_release_pulp_url }}' + verify_ssl = true + format = "json" + username = '{{ stackhpc_release_pulp_username }}' + password = '{{ stackhpc_release_pulp_password }}' + no_log: true + + - name: Find DOCA kernel repo package + ansible.builtin.shell: + cmd: 'find /tmp/DOCA.* -name doca-kernel-repo-*' + register: doca_kernel_repo + changed_when: false + + - name: Create Pulp repository for DOCA kernel modules pulp.squeezer.rpm_repository: pulp_url: "{{ stackhpc_release_pulp_url }}" username: "{{ stackhpc_release_pulp_username }}" password: "{{ stackhpc_release_pulp_password }}" - name: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.name }}" + name: "{{ doca_modules_repo_name }}" state: present retries: "{{ pulp_timeout_retries | default(3) }}" - - name: Lookup Pulp RPMs on builder - ansible.builtin.find: - paths: /home/cloud-user/ofed - register: rpm_dir - - name: Upload OFED RPMs to Pulp ansible.builtin.shell: cmd: | - pulp \ - --base-url '{{ stackhpc_release_pulp_url }}' \ - --username '{{ stackhpc_release_pulp_username }}' \ - --password '{{ stackhpc_release_pulp_password }}' \ + {{ venv }}/bin/pulp \ rpm content \ --type package upload \ - --repository '{{ stackhpc_pulp_repo_doca_ofed_rhel9.name }}' \ - --file {{ item.path }} \ - with_items: "{{ rpm_dir.files }}" - no_log: true + --repository '{{ doca_modules_repo_name }}' \ + --file {{ item }} \ + --no-publish \ + with_items: "{{ doca_kernel_repo.stdout_lines }}" - name: Create Pulp publication for OFED pulp.squeezer.rpm_publication: pulp_url: "{{ stackhpc_release_pulp_url }}" username: "{{ stackhpc_release_pulp_username }}" password: "{{ stackhpc_release_pulp_password }}" - repository: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.name }}" + repository: "{{ doca_modules_repo_name }}" state: present register: publication @@ -48,7 +74,8 @@ pulp_url: "{{ stackhpc_release_pulp_url }}" username: "{{ stackhpc_release_pulp_username }}" password: "{{ stackhpc_release_pulp_password }}" - name: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.distribution_name }}" + name: "{{ doca_modules_repo_distribution_name + ofed_tag }}" publication: "{{ publication.publication.pulp_href }}" - base_path: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.base_path }}" + content_guard: development + base_path: "{{ doca_modules_repo_base_path + ofed_tag }}" state: present diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index 916ffe5a1..c337db281 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -47,7 +47,28 @@ dnf_custom_repos: "{{ stackhpc_dnf_repos if stackhpc_repos_enabled | bool else [ # To use these repos, set stackhpc_repos_enabled to true. # This is done by default for hosts in the overcloud group via a group_vars # file. -stackhpc_dnf_repos: "{{ dnf_custom_repos_el9 | combine(dnf_custom_repos_rocky_9) | combine(dnf_custom_repos_elrepo_9 if dnf_install_elrepo_9 | bool else {}) }}" +stackhpc_dnf_repos: "{{ dnf_custom_repos_el9 | combine(dnf_custom_repos_rocky_9) | combine(dnf_custom_repos_elrepo_9 if dnf_install_elrepo_9 | bool else {}) | combine(dnf_custom_repos_doca if dnf_install_doca | bool else {}) }}" + +# DOCA repositories +dnf_custom_repos_doca: + doca: + baseurl: "{{ stackhpc_repo_rhel9_doca_url }}" + description: "DOCA Online Repo {{ stackhpc_pulp_doca_version }} - RHEL $releasever" + enabled: "{{ dnf_enable_doca | bool | default(false) }}" + priority: -1 + file: doca + gpgcheck: no + username: "{{ stackhpc_repo_mirror_username | default(omit, true) }}" + password: "{{ stackhpc_repo_mirror_password | default(omit, true) }}" + doca-modules: + baseurl: "{{ stackhpc_repo_rhel9_doca_modules_url }}" + description: "OFED Kernel module repository for DOCA {{ stackhpc_pulp_doca_version }} - RHEL $releasever" + enabled: "{{ dnf_enable_doca_modules | bool | default(false) }}" + priority: -1 + file: doca + gpgcheck: no + username: "{{ stackhpc_repo_mirror_username | default(omit, true) }}" + password: "{{ stackhpc_repo_mirror_password | default(omit, true) }}" # Custom repositories shared between all RHEL 9 derivatives. dnf_custom_repos_el9: @@ -132,6 +153,12 @@ dnf_enable_epel: "{{ dnf_install_epel | bool }}" # Whether to enable the ELRepo repository. This affects RedHat-based, 9.x release systems only. dnf_enable_elrepo_9: "{{ dnf_install_elrepo_9 | bool }}" +# Whether to enable DOCA repositories. This affects RedHat-based systems only. +dnf_enable_doca: "{{ dnf_install_doca | bool }}" + +# Whether to enable the DOCA kernel module repository. This affects RedHat-based systems only. +dnf_enable_doca_modules: "{{ dnf_install_doca | bool }}" + # URL of EPEL GPG keys. dnf_epel_9_gpg_key_url: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9" @@ -152,6 +179,9 @@ dnf_enable_docker: true #URL of docker repo GPG key dnf_docker_gpg_key_url: "https://download.docker.com/linux/centos/gpg" +# Whether to create a repo file for DOCA. This affects RedHat-based +# systems only. +dnf_install_doca: "{{ 'mlnx' in group_names }}" ############################################################################### # DNF Automatic configuration. diff --git a/etc/kayobe/environments/ci-aio/stackhpc-ci.yml b/etc/kayobe/environments/ci-aio/stackhpc-ci.yml index 50af5d160..a3f2bbb14 100644 --- a/etc/kayobe/environments/ci-aio/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-aio/stackhpc-ci.yml @@ -50,6 +50,8 @@ stackhpc_repo_rocky_9_extras_version: "{{ stackhpc_pulp_repo_rocky_9_extras_vers stackhpc_repo_rocky_9_crb_version: "{{ stackhpc_pulp_repo_rocky_9_crb_version }}" stackhpc_repo_rocky_9_highavailability_version: "{{ stackhpc_pulp_repo_rocky_9_highavailability_version }}" stackhpc_repo_rocky_9_sig_security_common_version: "{{ stackhpc_pulp_repo_rocky_9_sig_security_common_version }}" +stackhpc_repo_rhel9_doca_version: "{{ stackhpc_pulp_repo_rhel9_doca_version }}" +stackhpc_repo_rhel9_doca_modules_version: "{{ stackhpc_pulp_repo_rhel9_doca_modules_version }}" # Rocky-and-CI-specific Pulp urls stackhpc_include_os_minor_version_in_repo_url: true diff --git a/etc/kayobe/environments/ci-builder/inventory/hosts b/etc/kayobe/environments/ci-builder/inventory/hosts index e2b89f71b..a272c6b7f 100644 --- a/etc/kayobe/environments/ci-builder/inventory/hosts +++ b/etc/kayobe/environments/ci-builder/inventory/hosts @@ -1,8 +1,5 @@ # A 'seed' host used for building images. -[ofed-builder:children] -seed - # This is neccesary to run `overcloud deployment image build` [controllers] builder diff --git a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml index e3112029f..3f6f3ee95 100644 --- a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml @@ -75,6 +75,7 @@ stackhpc_repo_rocky_9_extras_version: "{{ stackhpc_pulp_repo_rocky_9_extras_vers stackhpc_repo_rocky_9_crb_version: "{{ stackhpc_pulp_repo_rocky_9_crb_version }}" stackhpc_repo_rocky_9_highavailability_version: "{{ stackhpc_pulp_repo_rocky_9_highavailability_version }}" stackhpc_repo_rocky_9_sig_security_common_version: "{{ stackhpc_pulp_repo_rocky_9_sig_security_common_version }}" +stackhpc_repo_rhel9_doca_version: "{{ stackhpc_pulp_repo_rhel9_doca_version }}" # Rocky-and-CI-specific Pulp urls stackhpc_include_os_minor_version_in_repo_url: true diff --git a/etc/kayobe/environments/ci-doca-builder/.kayobe-environment b/etc/kayobe/environments/ci-doca-builder/.kayobe-environment new file mode 100644 index 000000000..0cedd00c8 --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/.kayobe-environment @@ -0,0 +1,4 @@ +--- + +dependencies: + - ci-builder diff --git a/etc/kayobe/environments/ci-doca-builder/controllers.yml b/etc/kayobe/environments/ci-doca-builder/controllers.yml new file mode 100644 index 000000000..f0dfd79a3 --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/controllers.yml @@ -0,0 +1,15 @@ +--- +############################################################################### +# Controller node configuration. + +# User with which to access the controllers via SSH during bootstrap, in order +# to setup the Kayobe user account. Default is {{ os_distribution }}. +controller_bootstrap_user: cloud-user + +############################################################################### +# Controller node LVM configuration. + +# List of controller volume groups. See mrlesmithjr.manage_lvm role for +# format. +controller_lvm_groups: + - "{{ stackhpc_lvm_group_rootvg }}" diff --git a/etc/kayobe/environments/ci-builder/inventory/group_vars/seed/lvm.yml b/etc/kayobe/environments/ci-doca-builder/inventory/group_vars/ofed-builder/lvm.yml similarity index 96% rename from etc/kayobe/environments/ci-builder/inventory/group_vars/seed/lvm.yml rename to etc/kayobe/environments/ci-doca-builder/inventory/group_vars/ofed-builder/lvm.yml index 2c9a52142..ab553dfd3 100644 --- a/etc/kayobe/environments/ci-builder/inventory/group_vars/seed/lvm.yml +++ b/etc/kayobe/environments/ci-doca-builder/inventory/group_vars/ofed-builder/lvm.yml @@ -1,4 +1,5 @@ --- + # List of extra LVs to include in the rootvg VG. stackhpc_lvm_group_rootvg_lvs_extra: - "{{ stackhpc_lvm_lv_docker }}" @@ -16,7 +17,7 @@ stackhpc_lvm_lv_root_size: 2g stackhpc_lvm_lv_tmp_size: 2g # StackHPC LVM lv_var LV size. -stackhpc_lvm_lv_var_size: 2g +stackhpc_lvm_lv_var_size: 3g # StackHPC LVM lv_var_tmp LV size. stackhpc_lvm_lv_var_tmp_size: 2g diff --git a/etc/kayobe/environments/ci-doca-builder/inventory/hosts b/etc/kayobe/environments/ci-doca-builder/inventory/hosts new file mode 100644 index 000000000..c0b6fd7cb --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/inventory/hosts @@ -0,0 +1,3 @@ +[ofed-builder:children] +seed +controllers diff --git a/etc/kayobe/environments/ci-doca-builder/seed.yml b/etc/kayobe/environments/ci-doca-builder/seed.yml new file mode 100644 index 000000000..ecba7d050 --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/seed.yml @@ -0,0 +1,15 @@ +--- +############################################################################### +# Seed node configuration. + +# User with which to access the seed via SSH during bootstrap, in order +# to setup the Kayobe user account. Default is {{ os_distribution }}. +seed_bootstrap_user: cloud-user + +############################################################################### +# Seed node LVM configuration. + +# Seed lvm configuration. See inventory/group_vars/seed/lvm.yml +# for the exact configuration. +seed_lvm_groups: + - "{{ stackhpc_lvm_group_rootvg }}" diff --git a/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml new file mode 100644 index 000000000..a6e66db54 --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml @@ -0,0 +1,9 @@ +--- +############################################################################### +# StackHPC configuration. + +# Enable StackHPC Ark repositories for DOCA builds +stackhpc_repos_enabled: true +enable_docker_repo: false +dnf_install_doca: true +dnf_enable_doca_modules: false diff --git a/etc/kayobe/inventory/groups b/etc/kayobe/inventory/groups index 1028c2ca9..3a9739873 100644 --- a/etc/kayobe/inventory/groups +++ b/etc/kayobe/inventory/groups @@ -76,6 +76,9 @@ compute-vgpu [iommu:children] vgpu +[mlnx] +# Empty group to provide declaration of mlnx group. + ############################################################################### # Service groups. diff --git a/etc/kayobe/ofed.yml b/etc/kayobe/ofed.yml index 696e3c93b..3ca9201fb 100644 --- a/etc/kayobe/ofed.yml +++ b/etc/kayobe/ofed.yml @@ -1,12 +1,21 @@ --- -# DOCA OFED configuration +# DOCA OFED configuration. -# DOCA OFED version -stackhpc_pulp_doca_ofed_version: 24.07 +############################################################################### +# DOCA host version +stackhpc_pulp_doca_version: 2.9.1 -# DOCA OFED repositories -stackhpc_pulp_repo_doca_ofed_rhel9: - name: Mellanox Technologies doca_ofed {{ stackhpc_pulp_doca_ofed_version }} Rocky 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} - url: "{{ stackhpc_release_pulp_content_url }}/doca_ofed/{{ stackhpc_pulp_doca_ofed_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64" - distribution_name: "mlnx-ofed-{{ stackhpc_pulp_doca_ofed_version }}-rocky-9-{{ stackhpc_pulp_repo_rocky_9_minor_version }}" - base_path: "mlnx_ofed/{{ stackhpc_pulp_doca_ofed_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64" +############################################################################### +# Pulp configuration for DOCA OFED + +# Whether to sync OFED repositories into the local Pulp service +stackhpc_pulp_sync_ofed: "{{ groups['mlnx'] | length > 0 }}" + +# DOCA Snapshot versions. The defaults use the appropriate version from +# pulp-repo-versions.yml +stackhpc_pulp_repo_rhel9_doca_version: "{{ lookup('vars', 'stackhpc_pulp_repo_rhel_9_{{ stackhpc_pulp_repo_rocky_9_minor_version }}_doca_version') }}" +stackhpc_pulp_repo_rhel9_doca_modules_version: "{{ lookup('vars', 'stackhpc_pulp_repo_rhel_9_{{ stackhpc_pulp_repo_rocky_9_minor_version }}_doca_modules_version') }}" + +############################################################################### +# Dummy variable to allow Ansible to accept this file. +workaround_ansible_issue_8743: yes diff --git a/etc/kayobe/pulp-repo-versions.yml b/etc/kayobe/pulp-repo-versions.yml index 672980bfb..b9a27f494 100644 --- a/etc/kayobe/pulp-repo-versions.yml +++ b/etc/kayobe/pulp-repo-versions.yml @@ -49,3 +49,7 @@ stackhpc_pulp_repo_rocky_9_sig_security_common_version: 20241127T003858 stackhpc_pulp_repo_ubuntu_cloud_archive_version: 20241217T045049 stackhpc_pulp_repo_ubuntu_jammy_security_version: 20241217T071258 stackhpc_pulp_repo_ubuntu_jammy_version: 20241217T071258 +stackhpc_pulp_repo_rhel_9_4_doca_version: 20241211T153620 +stackhpc_pulp_repo_rhel_9_4_doca_modules_version: 20241213T112245 +stackhpc_pulp_repo_rhel_9_5_doca_version: 20241211T171301 +stackhpc_pulp_repo_rhel_9_5_doca_modules_version: 20250115T150314 diff --git a/etc/kayobe/pulp.yml b/etc/kayobe/pulp.yml index e55b82bb3..1b7868ff6 100644 --- a/etc/kayobe/pulp.yml +++ b/etc/kayobe/pulp.yml @@ -373,6 +373,18 @@ stackhpc_pulp_rpm_repos: base_path: "opensearch-dashboards/2.x/yum/" required: "{{ stackhpc_pulp_sync_for_local_container_build | bool and stackhpc_pulp_sync_el_9 | bool }}" + - name: DOCA Online Repo {{ stackhpc_pulp_doca_version }} - RHEL 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} + url: "{{ stackhpc_release_pulp_content_url }}/doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_pulp_repo_rhel9_doca_version }}" + distribution_name: "doca-{{ stackhpc_pulp_doca_version }}-rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}-" + base_path: "doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/" + required: "{{ stackhpc_pulp_sync_ofed | bool and stackhpc_pulp_sync_el_9 | bool }}" + + - name: OFED Kernel modules for DOCA {{ stackhpc_pulp_doca_version }} - RHEL 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} + url: "{{ stackhpc_release_pulp_content_url }}/doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_pulp_repo_rhel9_doca_modules_version }}" + distribution_name: "doca-modules-{{ stackhpc_pulp_doca_version }}-rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}-" + base_path: "doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/" + required: "{{ stackhpc_pulp_sync_ofed | bool and stackhpc_pulp_sync_el_9 | bool }}" + # RPM repositories stackhpc_pulp_repository_rpm_repos: >- {%- set rpm_repos = [] -%} diff --git a/etc/kayobe/stackhpc.yml b/etc/kayobe/stackhpc.yml index b1955c02b..db64a8b8c 100644 --- a/etc/kayobe/stackhpc.yml +++ b/etc/kayobe/stackhpc.yml @@ -56,6 +56,14 @@ stackhpc_repo_ceph_reef_debian_version: "{{ stackhpc_repo_distribution }}" ############################################################################### # RPMs +# DOCA +stackhpc_repo_rhel9_doca_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_repo_rhel9_doca_version }}" +stackhpc_repo_rhel9_doca_version: "{{ stackhpc_repo_distribution }}" + +# DOCA Modules +stackhpc_repo_rhel9_doca_modules_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_repo_rhel9_doca_modules_version }}" +stackhpc_repo_rhel9_doca_modules_version: "{{ stackhpc_repo_distribution }}" + # Grafana stackhpc_repo_grafana_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/grafana/oss/rpm/{{ stackhpc_repo_grafana_version }}" stackhpc_repo_grafana_version: "{{ stackhpc_repo_distribution }}" diff --git a/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml b/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml new file mode 100644 index 000000000..8b023c4e9 --- /dev/null +++ b/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml @@ -0,0 +1,16 @@ +--- +features: + - | + Using DOCA LTS 2.9.1. + - | + DOCA workflows updated to build kernel modules only, relying + on Release Train synchronisation of DOCA upstream for userspace + packages. + - | + Improved documentation now including DOCA install playbook. + - | + Local Pulp syncs for DOCA/DOCA kernel module repository. +fixes: + - | + Miscellaneous issues with the package-build-ofed workflow + are resolved in this patchset.