Skip to content

Commit 0641dd5

Browse files
committed
Add the steps to reboot the computes after update.
This sequence implements reboot of the compute nodes after the update. If one or more instances have been created they will be live-migrated to others instance before the reboot and migrated back to the original hypervisor after the reboot. Some basic sanity checks are performed after the reboot and before the migration back to ensure that the necessary services are up and running. Closes: https://issues.redhat.com/browse/OSPRH-8937
1 parent 79a60e4 commit 0641dd5

File tree

4 files changed

+135
-0
lines changed

4 files changed

+135
-0
lines changed

roles/update/tasks/main.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,6 @@
7474
- not cifmw_update_run_dryrun | bool
7575
ansible.builtin.shell: |
7676
{{ cifmw_update_artifacts_basedir }}/control_plane_test_stop.sh
77+
78+
- name: Reboot the compute nodes
79+
ansible.builtin.include_tasks: reboot_compute.yml
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
- name: Define command for OpenStack client interactions
2+
ansible.builtin.set_fact:
3+
openstack_cmd: "oc rsh -n openstack openstackclient openstack"
4+
bash_cmd: "oc rsh -n openstack openstackclient bash -c"
5+
6+
- name: Register storage backend type
7+
shell: >-
8+
{{ openstack_cmd }} volume service list -f json |
9+
jq -r -c '.[] | select(.Binary | contains("cinder-volume")) | .Host'
10+
register: storage_backend
11+
12+
- name: Get list of OpenStack hypervisors
13+
ansible.builtin.shell: |
14+
{{ openstack_cmd }} hypervisor list -f json
15+
register: hypervisor_list
16+
changed_when: false
17+
18+
- name: Parse the hypervisor list to extract hostnames
19+
ansible.builtin.set_fact:
20+
hypervisor_hostnames: "{{ hypervisor_list.stdout | from_json | map(attribute='Hypervisor Hostname') | list }}"
21+
22+
- name: Iterate over each hypervisor
23+
ansible.builtin.include_tasks: reboot_hypervisor.yml
24+
loop: "{{ hypervisor_hostnames }}"
25+
loop_control:
26+
loop_var: hypervisor
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
---
2+
- name: Extract short hostname from FQDN
3+
ansible.builtin.set_fact:
4+
hypervisor_short_name: "{{ hypervisor.split('.')[0] }}"
5+
6+
- debug:
7+
msg: "Rebooting {{ hypervisor_short_name }}"
8+
9+
- name: Check active VMs on hypervisor
10+
ansible.builtin.shell: >-
11+
{{ openstack_cmd }} server list --all --host {{ hypervisor }} -f json
12+
| jq -r -c '.[] | select(.Status | contains("ACTIVE") or contains("PAUSED")) | .ID'
13+
register: active_vms
14+
changed_when: false
15+
16+
- name: Evacuate VMs if they are running
17+
ansible.builtin.shell: >-
18+
{{ bash_cmd }} ". cloudrc &&
19+
nova host-evacuate-live
20+
{% if 'ceph' not in storage_backend.stdout %}
21+
--block-migrate
22+
{% endif %}
23+
{{ hypervisor }}"
24+
when: active_vms.stdout != ''
25+
changed_when: true
26+
27+
- name: Wait for compute node to get quiesced
28+
ansible.builtin.shell: >-
29+
{{ openstack_cmd }} server list --all --host {{ hypervisor }} -f json
30+
| jq -r -c '[.[] | select(.Status |
31+
contains("ACTIVE") or contains("PAUSED") or contains("MIGRATING"))]
32+
| length'
33+
register: compute_node_instances
34+
until: compute_node_instances.stdout.find("0") > -1
35+
retries: 30
36+
delay: 5
37+
when:
38+
- active_vms.stdout != ''
39+
40+
- name: Reboot the hypervisor
41+
ansible.builtin.reboot:
42+
reboot_timeout: 1200
43+
test_command: "systemctl is-system-running | grep -e running -e degraded"
44+
delegate_to: "{{ hypervisor_short_name }}"
45+
become: true
46+
47+
- name: Perform sanity checks post-reboot
48+
ansible.builtin.include_tasks: reboot_hypervisor_sanity_checks.yml
49+
vars:
50+
current_hypervisor: "{{ hypervisor }}"
51+
52+
- debug:
53+
msg: "Migrate back {{ item }} to {{ hypervisor_short_name }}."
54+
with_items: "{{ active_vms.stdout_lines }}"
55+
56+
- name: Migrate back VMs post-reboot
57+
ansible.builtin.shell: >-
58+
set -o pipefail;
59+
{{ bash_cmd }} ". cloudrc &&
60+
nova live-migration
61+
{% if 'ceph' not in storage_backend.stdout %}
62+
--block-migrate
63+
{% endif %}
64+
{{ item }} {{ hypervisor }}";
65+
{{ openstack_cmd }} server show {{ item }} -f json |
66+
jq -r -c '. | .["OS-EXT-SRV-ATTR:host"]'
67+
register: instance_migration_result
68+
until: instance_migration_result.stdout.find(hypervisor) > -1
69+
retries: 30
70+
delay: 5
71+
with_items: "{{ active_vms.stdout_lines }}"
72+
when:
73+
- active_vms.stdout != ''
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
---
2+
- ansible.builtin.debug:
3+
msg: "Here I'm testing the reboot for {{ current_hypervisor }}."
4+
5+
- name: Verify nova-compute service
6+
ansible.builtin.shell: >-
7+
{{ openstack_cmd }} compute service list --host {{ current_hypervisor }} -f json
8+
| jq -r -c '.[]
9+
| select(.Binary | contains("nova-compute")) | .State'
10+
register: nova_compute_status
11+
until: nova_compute_status.stdout == 'up'
12+
retries: 5
13+
delay: 30
14+
15+
- name: Verify ovn-controller service
16+
ansible.builtin.shell: >-
17+
{{ openstack_cmd }} network agent list --host {{ current_hypervisor }} -f json
18+
| jq -r -c '.[]
19+
| select(.Binary | contains("ovn-controller")) | .Alive'
20+
register: ovn_controller_status
21+
until: ovn_controller_status.stdout == 'true'
22+
retries: 5
23+
delay: 30
24+
25+
- name: Verify networking-ovn-metadata-agent
26+
ansible.builtin.shell: >-
27+
{{ openstack_cmd }} network agent list --host {{ current_hypervisor }} -f json
28+
| jq -r -c '.[]
29+
| select(.Binary | contains("neutron-ovn-metadata-agent")) | .Alive'
30+
register: networking_ovn_metadata_status
31+
until: networking_ovn_metadata_status.stdout == 'true'
32+
retries: 5
33+
delay: 30

0 commit comments

Comments
 (0)