ceph_migrate refactor restart mgr handler as task to fix role usage

rebtoor · rebtoor · commit fc7df71e7e36 · 2025-11-04T17:11:01.000+01:00
This is a follow-up to #1089. Refactor `restart mgr` operation: remove handler, add explicit task. Previously, restarting the Ceph manager (`mgr`) used an Ansible handler, which delayed execution until the playbook's end. This caused issues for Ceph failover, as the action needed to occur immediately after MON host removal. Additionally, the handler restarted the manager from the controller node, but the correct approach is to run `cephadm` directly on a ComputeHCI node. This update removes the handler file and replaces it with an explicit task that runs right after the MON removal block in `drain.yaml`. The task directly runs `cephadm shell -- ceph mgr fail` on a ComputeHCI node when available, which ensures prompt and correct manager failover. - Deleted the now-unneeded `handlers/main.yml`. - Added a dedicated task to restart mgr after host removal, delegating to ComputeHCI if present. Signed-off-by: Roberto Alfieri <ralfieri@redhat.com>
diff --git a/tests/roles/ceph_migrate/handlers/main.yml b/tests/roles/ceph_migrate/handlers/main.yml
diff --git a/tests/roles/ceph_migrate/tasks/drain.yaml b/tests/roles/ceph_migrate/tasks/drain.yaml
@@ -81,4 +81,12 @@
       when: lsh.stdout | from_json | community.general.json_query('[*].hostname') | length > 0
       ansible.builtin.command:
         "{{ ceph_cli }} orch host rm {{ cur_mon }} --force"
-      notify: restart mgr
+
+    - name: restart mgr on compute
+      become: true
+      ansible.builtin.command:
+        "cephadm shell -- ceph mgr fail"
+      delegate_to: "{{ groups['ComputeHCI'][0] | default(inventory_hostname) }}"
+      when:
+        - groups['ComputeHCI'] is defined
+        - groups['ComputeHCI'] | length > 0
diff --git a/tests/roles/ceph_migrate/tasks/mon.yaml b/tests/roles/ceph_migrate/tasks/mon.yaml
@@ -120,7 +120,6 @@
         CEPH_CONTAINER_IMAGE: "{{ ceph_container }}"
         CEPH_CONTAINER_BINARY: "{{ ceph_container_cli }}"
         CEPH_CONF: "{{ ceph_config_tmp_client_home }}"
-      notify: restart mgr
 
     - name: Print the resulting spec
       when: debug | default(false)
@@ -143,7 +142,6 @@
       delay: "{{ ceph_retry_delay }}"
       loop_control:
         label: "MON - Get tmp mon"
-      notify: restart mgr
 
     - name: MON - Wait for the current mon to be deleted
       ansible.builtin.pause:
@@ -157,7 +155,6 @@
     - name: MON - Redeploy mon on {{ target_node }}
       ansible.builtin.command:
         "{{ ceph_cli }} orch daemon add mon {{ target_node }}:{{ mon_ipaddr }}"
-      notify: restart mgr
 
     - name: MON - Wait for the spec to be updated
       ansible.builtin.pause:
@@ -197,7 +194,6 @@
     CEPH_CONTAINER_IMAGE: "{{ ceph_container }}"
     CEPH_CONTAINER_BINARY: "{{ ceph_container_cli }}"
     CEPH_CONF: "{{ ceph_config_tmp_client_home }}"
-  notify: restart mgr
 
 # Wait for the redeploy to finish before moving to the next stage
 - name: MON - wait daemons