Revert "fix: parallelize queue_dispatching in monitor loop #1357 (#1392)

bzwei · web-flow · commit fd3d0ce76ad2 · 2025-09-11T21:30:15.000+02:00
Reverts commit 637aa34. https://issues.redhat.com/browse/AAP-52854 The previous commit may cause multiple failures in ATF Tier3 tests. Let's rollback this change for now and rework it with more thorough tests.
diff --git a/src/aap_eda/tasks/orchestrator.py b/src/aap_eda/tasks/orchestrator.py
@@ -176,34 +176,10 @@ def _run_request(
 
 
 def queue_dispatch(
-    process_parent_type: ProcessParentType,
-    process_parent_id: int,
-    request_type: Optional[ActivationRequest] = None,
-    request_id: str = "",
-) -> None:  # pragma: no cover
-    job_id = _manage_process_job_id(process_parent_type, process_parent_id)
-    with advisory_lock(job_id, wait=False) as acquired:
-        if not acquired:
-            LOGGER.debug(
-                f"queue_dispatch({job_id}) already being ran, "
-                f"not dispatching request {request_type}",
-            )
-            return
-        queue_dispatch_no_lock(
-            process_parent_type,
-            process_parent_id,
-            request_type,
-            request_id,
-            job_id,
-        )
-
-
-def queue_dispatch_no_lock(
     process_parent_type: ProcessParentType,
     process_parent_id: int,
     request_type: Optional[ActivationRequest],
     request_id: str = "",
-    job_id: str = "",
 ):
     """Dispatch the request to the right queue.
 
@@ -213,6 +189,8 @@ def queue_dispatch_no_lock(
     checks the health of the queue before dispatching the request.
     Handles workers offline and unhealthy queues.
     """
+    job_id = _manage_process_job_id(process_parent_type, process_parent_id)
+
     # TODO: add "monitor" type to ActivationRequestQueue
     if request_type is None:
         request_type = "Monitor"
@@ -231,6 +209,14 @@ def queue_dispatch_no_lock(
     assign_request_id(request_id)
     assign_log_tracking_id(process_parent.log_tracking_id)
 
+    with advisory_lock(job_id, wait=False) as acquired:
+        if not acquired:
+            LOGGER.debug(
+                f"_manage({job_id}) already being ran, "
+                f"not dispatching request {request_type}",
+            )
+            return
+
     LOGGER.info(
         f"Dispatching request {request_type} for {process_parent_type} "
         f"{process_parent_id}",
@@ -563,10 +549,7 @@ def monitor_rulebook_processes_no_lock() -> None:
     """
     # run pending user requests
     for request in requests_queue.list_requests():
-        tasking.unique_enqueue(
-            "default",
-            "queue_dispatch_" + str(request.process_parent_id),
-            queue_dispatch,
+        queue_dispatch(
             request.process_parent_type,
             request.process_parent_id,
             request.request,
@@ -581,12 +564,12 @@ def monitor_rulebook_processes_no_lock() -> None:
             ActivationStatus.WORKERS_OFFLINE,
         ]
     ):
-        tasking.unique_enqueue(
-            "default",
-            "queue_dispatch_" + str(process.activation_id),
-            queue_dispatch,
-            str(process.parent_type),
-            process.activation_id,
+        process_parent_type = str(process.parent_type)
+        process_parent_id = process.activation_id
+
+        queue_dispatch(
+            process_parent_type,
+            process_parent_id,
             None,
             str(uuid.uuid4()),
         )
diff --git a/tests/integration/tasks/test_orchestrator.py b/tests/integration/tasks/test_orchestrator.py
@@ -13,8 +13,6 @@
 #  limitations under the License.
 
 import logging
-import time
-from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
 from unittest import mock
 
@@ -204,24 +202,26 @@ def test_monitor_rulebook_processes(
     get_queue_name_mock.return_value = "activation"
     call_args = [
         mock.call(
-            "default",
-            "queue_dispatch_" + str(activation.id),
-            orchestrator.queue_dispatch,
+            "activation",
+            orchestrator._manage_process_job_id(
+                ProcessParentType.ACTIVATION, activation.id
+            ),
+            orchestrator._manage,
             ProcessParentType.ACTIVATION,
             activation.id,
-            ActivationRequest.START,
             "",
         )
     ]
     for running in bulk_running_processes:
         call_args.append(
             mock.call(
-                "default",
-                "queue_dispatch_" + str(running.activation.id),
-                orchestrator.queue_dispatch,
+                "activation",
+                orchestrator._manage_process_job_id(
+                    ProcessParentType.ACTIVATION, running.activation.id
+                ),
+                orchestrator._manage,
                 ProcessParentType.ACTIVATION,
                 running.activation.id,
-                ActivationRequest.START,
                 "",
             )
         )
@@ -237,21 +237,6 @@ def test_monitor_rulebook_processes(
         )
     orchestrator.monitor_rulebook_processes()
 
-    # Also expect calls for running processes
-    # (these will have None as request type)
-    for running in bulk_running_processes:
-        call_args.append(
-            mock.call(
-                "default",
-                "queue_dispatch_" + str(running.activation.id),
-                orchestrator.queue_dispatch,
-                str(running.parent_type),
-                running.activation.id,
-                None,
-                mock.ANY,  # UUID string
-            )
-        )
-
     enqueue_mock.assert_has_calls(call_args, any_order=True)
 
 
@@ -283,45 +268,6 @@ def advisory_lock_mock(*args, **kwargs):
         )
 
         enqueue_mock.assert_not_called()
-        assert (
-            f"queue_dispatch({job_id}) already being ran, " in eda_caplog.text
-        )
+        assert f"_manage({job_id}) already being ran, " in eda_caplog.text
         activation.refresh_from_db()
         assert activation.status == ActivationStatus.STOPPED
-
-
-@pytest.mark.django_db
-def test_queue_dispatch_advisory_lock(activation, eda_caplog):
-    """Test that queue_dispatch advisory lock prevents duplicate execution."""
-    execution_count = 0
-
-    def mock_queue_dispatch_no_lock(*args, **kwargs):
-        nonlocal execution_count
-        execution_count += 1
-        time.sleep(1.0)
-        return True
-
-    def concurrent_dispatch():
-        """Function to run queue_dispatch concurrently."""
-        with mock.patch(
-            "aap_eda.tasks.orchestrator.queue_dispatch_no_lock",
-            side_effect=mock_queue_dispatch_no_lock,
-        ):
-            orchestrator.queue_dispatch(
-                ProcessParentType.ACTIVATION,
-                activation.id,
-                ActivationRequest.START,
-                "test-request",
-            )
-
-    with ThreadPoolExecutor(max_workers=3) as executor:
-        futures = [executor.submit(concurrent_dispatch) for _ in range(3)]
-        for future in futures:
-            future.result()
-
-    assert execution_count == 1, f"Expected 1 execution, got {execution_count}"
-
-    assert (
-        "queue_dispatch" in eda_caplog.text
-        and "already being ran" in eda_caplog.text
-    )