arbitrium-framework
diff --git a/‎src/arbitrium/core/executor/async_executor.py‎
Lines changed: 161 additions & 125 deletions b/‎src/arbitrium/core/executor/async_executor.py‎
Lines changed: 161 additions & 125 deletions
@@ -15,6 +15,9 @@ class ExecutionCancelledError(Exception):
     pass
 
 
+DEFAULT_GLOBAL_EXECUTION_TIMEOUT = 3600  # 1 hour default
+
+
 class AsyncExecutor(BaseExecutor):
     def __init__(
         self,
@@ -25,6 +28,10 @@ def __init__(
         self.broadcast_fn = broadcast_fn
         self._current_execution_id: str | None = None
         self._cancel_event: asyncio.Event = asyncio.Event()
+        # Global execution timeout (prevents runaway executions)
+        self.global_timeout = (config or {}).get(
+            "global_timeout", DEFAULT_GLOBAL_EXECUTION_TIMEOUT
+        )
 
     def cancel(self) -> bool:
         if self._current_execution_id:
@@ -228,145 +235,41 @@ async def execute(
         self._cancel_event.clear()
 
         logger.info(
-            "[%s] === EXECUTION START === nodes=%d, edges=%d",
+            "[%s] === EXECUTION START === nodes=%d, edges=%d, global_timeout=%ds",
             execution_id[:8],
             len(nodes),
             len(edges),
+            self.global_timeout,
         )
 
         self._report_execution_start(execution_id, len(nodes), len(edges))
 
         try:
-            (
-                node_instances,
-                _dependencies,
-                connections,
-                execution_layers,
-                feedback_connections,
-            ) = self._build_execution_graph(nodes, edges)
-
-            has_feedback = len(feedback_connections) > 0
-            max_iterations = 20
-
-            logger.info(
-                "[%s] Execution plan: %d layers, %d feedback edges",
-                execution_id[:8],
-                len(execution_layers),
-                len(feedback_connections),
-            )
-
-            self._validate_workflow(node_instances)
-
-            context, node_outputs = self._initialize_execution_state(
-                execution_id
+            # Wrap entire execution in global timeout
+            return await asyncio.wait_for(
+                self._execute_workflow(nodes, edges, execution_id),
+                timeout=self.global_timeout,
             )
-
-            iteration = 0
-            while True:
-                iteration += 1
-                context.round_num = iteration
-                self._check_cancelled()
-
-                if has_feedback:
-                    logger.info(
-                        "[%s] === ITERATION %d START ===",
-                        execution_id[:8],
-                        iteration,
-                    )
-                    await self._broadcast(
-                        {
-                            "type": "iteration_start",
-                            "execution_id": execution_id,
-                            "iteration": iteration,
-                        }
-                    )
-
-                for layer_index, layer in enumerate(execution_layers):
-                    self._check_cancelled()
-
-                    logger.info(
-                        "[%s] Starting layer %d/%d with %d node(s): %s",
-                        execution_id[:8],
-                        layer_index,
-                        len(execution_layers) - 1,
-                        len(layer),
-                        layer,
-                    )
-
-                    self._report_layer_start(
-                        execution_id, layer_index, len(execution_layers), layer
-                    )
-
-                    tasks = []
-                    for node_id in layer:
-                        node = node_instances[node_id]
-                        inputs = self._gather_node_inputs(
-                            node_id, connections, node_outputs
-                        )
-                        for src, tgt, src_h, tgt_h in feedback_connections:
-                            if tgt == node_id and src in node_outputs:
-                                src_outputs = node_outputs[src]
-                                if src_h in src_outputs:
-                                    inputs[tgt_h] = src_outputs[src_h]
-                        task = self._execute_single_node(
-                            node_id, node, inputs, context, execution_id
-                        )
-                        tasks.append((node_id, task))
-
-                    results = await asyncio.gather(
-                        *[task for _, task in tasks], return_exceptions=True
-                    )
-
-                    for (node_id, _), result in zip(
-                        tasks, results, strict=False
-                    ):
-                        if isinstance(result, ExecutionCancelledError):
-                            raise result
-                        if isinstance(result, BaseException):
-                            raise result
-                        node_outputs[node_id] = result
-
-                if not has_feedback:
-                    break
-
-                done = False
-                for node_id, outputs in node_outputs.items():
-                    if outputs.get("done") is True:
-                        done = True
-                        logger.info(
-                            "[%s] Termination signal from node %s",
-                            execution_id[:8],
-                            node_id,
-                        )
-                        break
-
-                if done:
-                    break
-
-                if iteration >= max_iterations:
-                    logger.warning(
-                        "[%s] Max iterations (%d) reached",
-                        execution_id[:8],
-                        max_iterations,
-                    )
-                    break
-
-            context.node_outputs = node_outputs
-
-            logger.info(
-                "[%s] === EXECUTION COMPLETE === iterations=%d, outputs from %d nodes",
+        except TimeoutError:
+            logger.error(
+                "[%s] === GLOBAL TIMEOUT === execution exceeded %ds",
                 execution_id[:8],
-                iteration,
-                len(node_outputs),
+                self.global_timeout,
+            )
+            await self._broadcast(
+                {
+                    "type": "execution_error",
+                    "execution_id": execution_id,
+                    "error": {
+                        "type": "GlobalTimeoutError",
+                        "message": f"Execution timed out after {self.global_timeout}s",
+                    },
+                }
             )
-
-            self._report_execution_complete(execution_id, len(node_outputs))
-
             return {
                 "execution_id": execution_id,
-                "outputs": node_outputs,
+                "error": f"Execution timed out after {self.global_timeout}s",
             }
-
         except ExecutionCancelledError:
             logger.info("[%s] Execution cancelled", execution_id[:8])
             await self._broadcast(
@@ -389,3 +292,136 @@ async def execute(
         finally:
             self._current_execution_id = None
             self._cancel_event.clear()
+
+    async def _execute_workflow(
+        self,
+        nodes: list[dict[str, Any]],
+        edges: list[dict[str, Any]],
+        execution_id: str,
+    ) -> dict[str, Any]:
+        """Core workflow execution logic, extracted for timeout wrapping."""
+        (
+            node_instances,
+            _dependencies,
+            connections,
+            execution_layers,
+            feedback_connections,
+        ) = self._build_execution_graph(nodes, edges)
+
+        has_feedback = len(feedback_connections) > 0
+        max_iterations = 20
+
+        logger.info(
+            "[%s] Execution plan: %d layers, %d feedback edges",
+            execution_id[:8],
+            len(execution_layers),
+            len(feedback_connections),
+        )
+
+        self._validate_workflow(node_instances)
+
+        context, node_outputs = self._initialize_execution_state(execution_id)
+
+        iteration = 0
+        while True:
+            iteration += 1
+            context.round_num = iteration
+            self._check_cancelled()
+
+            if has_feedback:
+                logger.info(
+                    "[%s] === ITERATION %d START ===",
+                    execution_id[:8],
+                    iteration,
+                )
+                await self._broadcast(
+                    {
+                        "type": "iteration_start",
+                        "execution_id": execution_id,
+                        "iteration": iteration,
+                    }
+                )
+
+            for layer_index, layer in enumerate(execution_layers):
+                self._check_cancelled()
+
+                logger.info(
+                    "[%s] Starting layer %d/%d with %d node(s): %s",
+                    execution_id[:8],
+                    layer_index,
+                    len(execution_layers) - 1,
+                    len(layer),
+                    layer,
+                )
+
+                self._report_layer_start(
+                    execution_id, layer_index, len(execution_layers), layer
+                )
+
+                tasks = []
+                for node_id in layer:
+                    node = node_instances[node_id]
+                    inputs = self._gather_node_inputs(
+                        node_id, connections, node_outputs
+                    )
+                    for src, tgt, src_h, tgt_h in feedback_connections:
+                        if tgt == node_id and src in node_outputs:
+                            src_outputs = node_outputs[src]
+                            if src_h in src_outputs:
+                                inputs[tgt_h] = src_outputs[src_h]
+                    task = self._execute_single_node(
+                        node_id, node, inputs, context, execution_id
+                    )
+                    tasks.append((node_id, task))
+
+                results = await asyncio.gather(
+                    *[task for _, task in tasks], return_exceptions=True
+                )
+
+                for (node_id, _), result in zip(tasks, results, strict=False):
+                    if isinstance(result, ExecutionCancelledError):
+                        raise result
+                    if isinstance(result, BaseException):
+                        raise result
+                    node_outputs[node_id] = result
+
+            if not has_feedback:
+                break
+
+            done = False
+            for node_id, outputs in node_outputs.items():
+                if outputs.get("done") is True:
+                    done = True
+                    logger.info(
+                        "[%s] Termination signal from node %s",
+                        execution_id[:8],
+                        node_id,
+                    )
+                    break
+
+            if done:
+                break
+
+            if iteration >= max_iterations:
+                logger.warning(
+                    "[%s] Max iterations (%d) reached",
+                    execution_id[:8],
+                    max_iterations,
+                )
+                break
+
+        context.node_outputs = node_outputs
+
+        logger.info(
+            "[%s] === EXECUTION COMPLETE === iterations=%d, outputs from %d nodes",
+            execution_id[:8],
+            iteration,
+            len(node_outputs),
+        )
+
+        self._report_execution_complete(execution_id, len(node_outputs))
+
+        return {
+            "execution_id": execution_id,
+            "outputs": node_outputs,
+        }