stackhpc
diff --git a/‎.automation‎ b/‎.automation‎
diff --git a/‎.github/workflows/multinode-inputs.py‎
Lines changed: 80 additions & 0 deletions b/‎.github/workflows/multinode-inputs.py‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎.github/workflows/stackhpc-multinode-periodic.yml‎
Lines changed: 50 additions & 0 deletions b/‎.github/workflows/stackhpc-multinode-periodic.yml‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎.github/workflows/stackhpc-multinode.yml‎
Lines changed: 71 additions & 0 deletions b/‎.github/workflows/stackhpc-multinode.yml‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎doc/source/_static/images/release-train.svg‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/_static/images/release-train.svg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/configuration/cephadm.rst‎
Lines changed: 16 additions & 1 deletion b/‎doc/source/configuration/cephadm.rst‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎doc/source/configuration/magnum-capi.rst‎
Lines changed: 4 additions & 9 deletions b/‎doc/source/configuration/magnum-capi.rst‎
Lines changed: 4 additions & 9 deletions
diff --git a/‎doc/source/configuration/monitoring.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/configuration/monitoring.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/configuration/wazuh.rst‎
Lines changed: 0 additions & 2 deletions b/‎doc/source/configuration/wazuh.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/source/operations/upgrading-ceph.rst‎
Lines changed: 4 additions & 3 deletions b/‎doc/source/operations/upgrading-ceph.rst‎
Lines changed: 4 additions & 3 deletions
@@ -0,0 +1,80 @@
+# Generate inputs for the reusable multinode.yml workflow.
+# The test scenario is randomly selected.
+# The inputs are printed to stdout in GitHub step output key=value format.
+
+from dataclasses import dataclass
+import random
+import typing as t
+
+
+@dataclass
+class OSRelease:
+    distribution: str
+    release: str
+    ssh_username: str
+
+
+@dataclass
+class OpenStackRelease:
+    version: str
+    previous_version: str
+    os_releases: t.List[OSRelease]
+
+
+@dataclass
+class Scenario:
+    openstack_release: OpenStackRelease
+    os_release: OSRelease
+    neutron_plugin: str
+    upgrade: bool
+
+
+ROCKY_9 = OSRelease("rocky", "9", "cloud-user")
+UBUNTU_JAMMY = OSRelease("ubuntu", "jammy", "ubuntu")
+# NOTE(upgrade): Add supported releases here.
+OPENSTACK_RELEASES = [
+    OpenStackRelease("2023.1", "zed", [ROCKY_9, UBUNTU_JAMMY])
+]
+NEUTRON_PLUGINS = ["ovs", "ovn"]
+
+
+def main() -> None:
+    scenario = random_scenario()
+    inputs = generate_inputs(scenario)
+    for name, value in inputs.items():
+        write_output(name, value)
+
+
+def random_scenario() -> Scenario:
+    openstack_release = random.choice(OPENSTACK_RELEASES)
+    os_release = random.choice(openstack_release.os_releases)
+    neutron_plugin = random.choice(NEUTRON_PLUGINS)
+    upgrade = random.random() > 0.6
+    return Scenario(openstack_release, os_release, neutron_plugin, upgrade)
+
+
+def generate_inputs(scenario: Scenario) -> t.Dict[str, str]:
+    branch = get_branch(scenario.openstack_release.version)
+    previous_branch = get_branch(scenario.openstack_release.previous_version)
+    inputs = {
+        "os_distribution": scenario.os_release.distribution,
+        "os_release": scenario.os_release.release,
+        "ssh_username": scenario.os_release.ssh_username,
+        "neutron_plugin": scenario.neutron_plugin,
+        "upgrade": str(scenario.upgrade).lower(),
+        "stackhpc_kayobe_config_version": branch,
+        "stackhpc_kayobe_config_previous_version": previous_branch,
+    }
+    return inputs
+
+
+def get_branch(version: str) -> str:
+    return f"stackhpc/{version}"
+
+
+def write_output(name: str, value: str) -> None:
+    print(f"{name}={value}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,50 @@
+---
+# This workflow provides a periodic deploy of a multi-node test cluster.
+# The test scenario is randomly selected.
+
+name: Multinode periodic
+'on':
+  schedule:
+    # Runs nightly at 2:42 AM.
+    - cron: "42 2 * * *"
+jobs:
+  generate-inputs:
+    name: Generate inputs
+    runs-on: ubuntu-latest
+    outputs:
+      os_distribution: ${{ steps.generate-inputs.outputs.os_distribution }}
+      os_release: ${{ steps.generate-inputs.outputs.os_release }}
+      ssh_username: ${{ steps.generate-inputs.outputs.ssh_username }}
+      neutron_plugin: ${{ steps.generate-inputs.outputs.neutron_plugin }}
+      upgrade: ${{ steps.generate-inputs.outputs.upgrade }}
+      stackhpc_kayobe_config_version: ${{ steps.generate-inputs.outputs.stackhpc_kayobe_config_version }}
+      stackhpc_kayobe_config_previous_version: ${{ steps.generate-inputs.outputs.stackhpc_kayobe_config_previous_version }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Generate inputs for multinode workflow
+        id: generate-inputs
+        run: |
+          python3 .github/workflows/multinode-inputs.py >> $GITHUB_OUTPUT
+
+      - name: Display generated inputs
+        run: |
+          echo '${{ toJSON(steps.generate-inputs.outputs) }}'
+  multinode:
+    name: Multinode periodic
+    needs:
+      - generate-inputs
+    uses: stackhpc/stackhpc-openstack-gh-workflows/.github/workflows/[email protected]
+    with:
+      multinode_name: mn-prdc-${{ github.run_id }}
+      os_distribution: ${{ needs.generate-inputs.outputs.os_distribution }}
+      os_release: ${{ needs.generate-inputs.outputs.os_release }}
+      ssh_username: ${{ needs.generate-inputs.outputs.ssh_username }}
+      neutron_plugin: ${{ needs.generate-inputs.outputs.neutron_plugin }}
+      upgrade: ${{ needs.generate-inputs.outputs.upgrade == 'true' }}
+      stackhpc_kayobe_config_version: ${{ needs.generate-inputs.outputs.stackhpc_kayobe_config_version }}
+      stackhpc_kayobe_config_previous_version: ${{ needs.generate-inputs.outputs.stackhpc_kayobe_config_previous_version }}
+      enable_slack_alert: true
+    secrets: inherit
+    if: github.repository == 'stackhpc/stackhpc-kayobe-config'
@@ -0,0 +1,71 @@
+---
+# This workflow provides a workflow_dispatch (manual) trigger to deploy a
+# multi-node test cluster.
+
+name: Multinode
+'on':
+  workflow_dispatch:
+    # NOTE: workflow_dispatch is limited to 10 inputs.
+    inputs:
+      multinode_name:
+        description: Multinode cluster name
+        type: string
+        required: true
+      os_distribution:
+        description: Host OS distribution
+        type: choice
+        default: rocky
+        options:
+          - rocky
+          - ubuntu
+      neutron_plugin:
+        description: Neutron ML2 plugin
+        type: choice
+        default: ovn
+        options:
+          - ovn
+          - ovs
+      upgrade:
+        description: Whether to perform an upgrade
+        type: boolean
+        default: false
+      break_on:
+        description: When to break execution for manual interaction
+        type: choice
+        default: never
+        options:
+          - always
+          - failure
+          - never
+          - success
+      break_duration:
+        description: How long to break execution for (minutes)
+        type: number
+        default: 60
+      ssh_key:
+        description: SSH public key to authorise on Ansible control host
+        type: string
+      terraform_kayobe_multinode_version:
+        description: terraform-kayobe-multinode version
+        type: string
+        default: main
+jobs:
+  multinode:
+    name: Multinode
+    uses: stackhpc/stackhpc-openstack-gh-workflows/.github/workflows/[email protected]
+    with:
+      multinode_name: ${{ inputs.multinode_name }}
+      os_distribution: ${{ inputs.os_distribution }}
+      os_release: ${{ inputs.os_distribution == 'rocky' && '9' || 'jammy' }}
+      ssh_username: ${{ inputs.os_distribution == 'rocky' && 'cloud-user' || 'ubuntu' }}
+      neutron_plugin: ${{ inputs.neutron_plugin }}
+      upgrade: ${{ inputs.upgrade }}
+      break_on: ${{ inputs.break_on }}
+      # Workaround loss of number type using fromJSON: https://github.com/orgs/community/discussions/67182
+      break_duration: ${{ fromJSON(inputs.break_duration) }}
+      ssh_key: ${{ inputs.ssh_key }}
+      stackhpc_kayobe_config_version: ${{ github.ref_name }}
+      # NOTE(upgrade): Reference the PREVIOUS release here.
+      stackhpc_kayobe_config_previous_version: stackhpc/zed
+      terraform_kayobe_multinode_version: ${{ inputs.terraform_kayobe_multinode_version }}
+    secrets: inherit
@@ -347,6 +347,10 @@ should be used in the Kolla Manila configuration e.g.:
 RADOS Gateways
 --------------
 
+RADOS Gateway integration is described in the :kolla-ansible-doc:`Kolla Ansible
+documentation
+<https://docs.openstack.org/kolla-ansible/latest/reference/storage/external-ceph-guide.html#radosgw>`.
+
 RADOS Gateways (RGWs) are defined with the following:
 
 .. code:: yaml
@@ -377,7 +381,7 @@ The set of commands below configure all of these.
   - "config set client.rgw rgw_enable_apis 's3, swift, swift_auth, admin'"
   - "config set client.rgw rgw_enforce_swift_acls true"
   - "config set client.rgw rgw_keystone_accepted_admin_roles 'admin'"
-  - "config set client.rgw rgw_keystone_accepted_roles 'member, Member, _member_, admin'"
+  - "config set client.rgw rgw_keystone_accepted_roles 'member, admin'"
   - "config set client.rgw rgw_keystone_admin_domain Default"
   - "config set client.rgw rgw_keystone_admin_password {{ secrets_ceph_rgw_keystone_password }}"
   - "config set client.rgw rgw_keystone_admin_project service"
@@ -393,6 +397,12 @@ The set of commands below configure all of these.
   - "config set client.rgw rgw_swift_account_in_url true"
   - "config set client.rgw rgw_swift_versioning_enabled true"
 
+Enable the Kolla Ansible RADOS Gateway integration in ``kolla.yml``:
+
+.. code:: yaml
+
+   kolla_enable_ceph_rgw: true
+
 As we have configured Ceph to respond to Swift APIs, you will need to tell
 Kolla to account for this when registering Swift endpoints with Keystone. Also,
 when ``rgw_swift_account_in_url`` is set, the equivalent Kolla variable should
@@ -414,6 +424,11 @@ before deploying the RADOS gateways. If you are using the Kolla load balancer
 
   kayobe overcloud service deploy -kt ceph-rgw,keystone,haproxy,loadbalancer
 
+There are two options for load balancing RADOS Gateway:
+
+1. HA with Ceph Ingress services
+2. RGWs with hyper-converged Ceph (using the Kolla Ansible deployed HAProxy
+   load balancer)
 
 .. _RGWs-with-hyper-converged-Ceph:
 
 
@@ -60,12 +60,12 @@ To deploy the CAPI management cluster using this site-specific environment, run
 
 .. code-block:: bash
 
-    # Activate the environment
-    ./bin/activate <site-specific-name>
-
     # Install or update the local Ansible Python venv
     ./bin/ensure-venv
 
+    # Activate the environment
+    source bin/activate <site-specific-name>
+
     # Install or update Ansible dependencies
     ansible-galaxy install -f -r ./requirements.yml
 
@@ -103,12 +103,7 @@ To configure the Magnum service with the Cluster API driver enabled, first ensur
 
 Next, copy the CAPI management cluster's kubeconfig file into your stackhpc-kayobe-config environment (e.g. ``<your-skc-environment>/kolla/config/magnum/kubeconfig``). This file must be Ansible vault encrypted.
 
-The following config should also be set in your stackhpc-kayobe-config environment:
-
-.. code-block:: yaml
-    :caption: kolla/globals.yml
-
-    magnum_capi_helm_driver_enabled: true
+The presence of a kubeconfig file in the Magnum config directory is used by Kolla to determine whether the CAPI Helm driver should be enabled.
 
 To apply the configuration, run ``kayobe overcloud service reconfigure -kt magnum``.
 
 
@@ -74,7 +74,7 @@ on the overcloud hosts:
 .. code-block:: console
 
     (kayobe) [stack@node ~]$ cd etc/kayobe
-    (kayobe) [stack@node kayobe]$ kayobe playbook run ansible/smartmontools.yml
+    (kayobe) [stack@node kayobe]$ kayobe playbook run ansible/smartmon-tools.yml
 
 SMART reporting should now be enabled along with a Prometheus alert for
 unhealthy disks and a Grafana dashboard called ``Hardware Overview``.
 
@@ -12,7 +12,6 @@ The short version
    particular the defaults assume that the ``provision_oc_net`` network will be
    used.
 #. Generate secrets: ``kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-secrets.yml``
-#. Encrypt the secrets: ``ansible-vault encrypt --vault-password-file ~/vault.password  $KAYOBE_CONFIG_PATH/environments/ci-multinode/wazuh-secrets.yml``
 #. Deploy the Wazuh manager: ``kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-manager.yml``
 #. Deploy the Wazuh agents: ``kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml``
 
@@ -250,7 +249,6 @@ It will be used by wazuh secrets playbook to generate wazuh secrets vault file.
 .. code-block:: console
 
   kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-secrets.yml
-  ansible-vault encrypt --vault-password-file ~/vault.pass $KAYOBE_CONFIG_PATH/wazuh-secrets.yml
 
 Configure Wazuh Dashboard's Server Host
 ---------------------------------------
 
@@ -63,7 +63,7 @@ Place the host or batch of hosts into maintenance mode:
 
 .. code-block:: console
 
-   sudo cephadm shell -- ceph orch host maintenance enter <host>
+   kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ceph-enter-maintenance.yml -l <host>
 
 To update all eligible packages, use ``*``, escaping if necessary:
 
@@ -72,7 +72,8 @@ To update all eligible packages, use ``*``, escaping if necessary:
    kayobe overcloud host package update --packages "*" --limit <host>
 
 If the kernel has been upgraded, reboot the host or batch of hosts to pick up
-the change:
+the change. While running this playbook, consider setting ``ANSIBLE_SERIAL`` to
+the maximum number of hosts that can safely reboot concurrently.
 
 .. code-block:: console
 
@@ -82,7 +83,7 @@ Remove the host or batch of hosts from maintenance mode:
 
 .. code-block:: console
 
-   sudo cephadm shell -- ceph orch host maintenance exit <host>
+   kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ceph-exit-maintenance.yml -l <host>
 
 Wait for Ceph health to return to ``HEALTH_OK``: