Skip to content

Commit 9f6177c

Browse files
committed
Add Recovery composite to retry a failed main task after attempting one or more recovery behaviors
1 parent ea6c337 commit 9f6177c

File tree

3 files changed

+199
-0
lines changed

3 files changed

+199
-0
lines changed

docs/dot/recovery.dot

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
digraph recovery {
2+
graph [fontname="times-roman"];
3+
node [fontname="times-roman"];
4+
edge [fontname="times-roman"];
5+
6+
recovery [label="Recovery", shape=box, style=rounded, fontsize=11];
7+
8+
main [label="Main Behaviour", shape=ellipse, fontsize=10];
9+
rec1 [label="Recovery #1", shape=ellipse, fontsize=10];
10+
rec2 [label="Recovery #2", shape=ellipse, fontsize=10];
11+
recN [label="Recovery #N", shape=ellipse, fontsize=10];
12+
13+
recovery -> main;
14+
recovery -> rec1;
15+
recovery -> rec2;
16+
recovery -> recN;
17+
18+
{rank=same; rec1 rec2 recN}
19+
}

py_trees/composites.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,3 +808,117 @@ def validate_policy_configuration(self) -> None:
808808
)
809809
self.logger.error(error_message)
810810
raise RuntimeError(error_message)
811+
812+
813+
class Recovery(Composite):
814+
"""
815+
A Recovery composite that wraps a main behaviour with a sequence of recovery behaviours.
816+
817+
.. graphviz:: dot/recovery.dot
818+
819+
Execution model:
820+
- Tick the main behaviour first.
821+
- If main returns SUCCESS or RUNNING, propagate that.
822+
- If main returns FAILURE:
823+
* Attempt the next recovery behaviour in sequence.
824+
* If recovery RUNNING, propagate RUNNING.
825+
* If recovery completes (SUCCESS or FAILURE), consume it and
826+
retry main (if any recoveries remain).
827+
- If all recoveries are exhausted and main still fails, return FAILURE.
828+
829+
Args:
830+
name (:obj:`str`): the composite behaviour name
831+
children ([:class:`~py_trees.behaviour.Behaviour`]): list of children,
832+
where the first is the main behaviour and the rest are recovery behaviours
833+
"""
834+
835+
def __init__(
836+
self,
837+
name: str,
838+
children: typing.Optional[typing.Sequence[behaviour.Behaviour]] = None,
839+
):
840+
super().__init__(name, children)
841+
if not children or len(children) < 1:
842+
raise ValueError("Recovery requires at least a main behaviour")
843+
844+
# Explicit references
845+
self.main: behaviour.Behaviour = children[0]
846+
self.recoveries: typing.List[behaviour.Behaviour] = (
847+
list(children[1:]) if len(children) > 1 else []
848+
)
849+
self.current_recovery_index: int = 0
850+
self.running_main = True
851+
852+
def initialise(self) -> None:
853+
"""Reset to the initial state: run main behaviour and restart recovery behaviours sequence."""
854+
self.current_recovery_index = 0
855+
self.running_main = True
856+
857+
def tick(self) -> typing.Iterator[behaviour.Behaviour]:
858+
"""
859+
Tick over the children.
860+
861+
Yields:
862+
:class:`~py_trees.behaviour.Behaviour`: a reference to itself or one of its children
863+
"""
864+
self.logger.debug("%s.tick()" % self.__class__.__name__)
865+
866+
if not self.children:
867+
self.stop(common.Status.FAILURE)
868+
yield self
869+
return
870+
871+
# First try the main behaviour if we are not in the middle of a recovery
872+
if self.running_main:
873+
for node in self.main.tick():
874+
yield node
875+
if node is self.main:
876+
if node.status in (common.Status.SUCCESS, common.Status.RUNNING):
877+
self.status = node.status
878+
yield self
879+
return
880+
elif node.status == common.Status.FAILURE:
881+
# proceed to next recovery
882+
self.running_main = False
883+
884+
# Try recoveries
885+
while self.current_recovery_index < len(self.recoveries):
886+
recovery = self.recoveries[self.current_recovery_index]
887+
for node in recovery.tick():
888+
yield node
889+
if node is recovery:
890+
if node.status == common.Status.RUNNING:
891+
self.status = common.Status.RUNNING
892+
yield self
893+
return
894+
elif node.status == common.Status.SUCCESS:
895+
self.status = common.Status.RUNNING
896+
# consume this recovery and retry main
897+
recovery.stop(common.Status.INVALID)
898+
self.current_recovery_index += 1
899+
self.main.stop(common.Status.INVALID)
900+
self.running_main = True
901+
yield self
902+
return
903+
elif node.status == common.Status.FAILURE:
904+
# consume this recovery and move to next
905+
recovery.stop(common.Status.INVALID)
906+
self.current_recovery_index += 1
907+
yield self
908+
909+
# No recoveries left → fail
910+
self.status = common.Status.FAILURE
911+
yield self
912+
913+
def stop(self, new_status: common.Status = common.Status.INVALID) -> None:
914+
"""
915+
Ensure that children are appropriately stopped and update status.
916+
917+
Args:
918+
new_status : the composite is transitioning to this new status
919+
"""
920+
for child in self.children:
921+
if child.status != common.Status.INVALID:
922+
child.stop(common.Status.INVALID)
923+
self.current_recovery_index = 0
924+
super().stop(new_status)

tests/test_recovery.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# tests/test_recovery.py
2+
3+
from py_trees.behaviours import StatusQueue
4+
from py_trees.common import Status
5+
from py_trees.composites import Recovery
6+
7+
8+
def test_main_success() -> None:
9+
main = StatusQueue("Main", [Status.SUCCESS], eventually=None)
10+
root = Recovery("Recovery", children=[main])
11+
12+
root.tick_once()
13+
assert main.status == Status.SUCCESS
14+
15+
16+
def test_main_running() -> None:
17+
main = StatusQueue("Main", [Status.RUNNING, Status.SUCCESS], eventually=None)
18+
root = Recovery("Recovery", children=[main])
19+
20+
root.tick_once()
21+
assert root.status == Status.RUNNING
22+
23+
root.tick_once()
24+
assert root.status == Status.SUCCESS
25+
26+
27+
def test_recovery_success_then_retry_main() -> None:
28+
# main fails, recovery1 succeeds, main succeeds when retried
29+
main = StatusQueue("Main", [Status.FAILURE, Status.SUCCESS], eventually=None)
30+
rec1 = StatusQueue("Rec1", [Status.SUCCESS], eventually=None)
31+
root = Recovery("Recovery", children=[main, rec1])
32+
33+
# tick 1: main fails, recovery1 succeeds, composite RUNNING
34+
root.tick_once()
35+
assert root.status == Status.RUNNING
36+
37+
# tick 2: main retried, succeeds
38+
root.tick_once()
39+
assert root.status == Status.SUCCESS
40+
41+
42+
def test_recovery_fails_then_next_succeeds() -> None:
43+
# main fails, rec1 fails, rec2 succeeds, then main succeeds
44+
main = StatusQueue("Main", [Status.FAILURE, Status.SUCCESS], eventually=None)
45+
rec1 = StatusQueue("Rec1", [Status.FAILURE], eventually=None)
46+
rec2 = StatusQueue("Rec2", [Status.SUCCESS], eventually=None)
47+
root = Recovery("Recovery", children=[main, rec1, rec2])
48+
49+
# tick 1: main fails, rec1 fails, composite RUNNING
50+
root.tick_once()
51+
assert root.status == Status.RUNNING
52+
53+
# tick 2: main retried, succeeds
54+
root.tick_once()
55+
assert root.status == Status.SUCCESS
56+
57+
58+
def test_all_recoveries_fail() -> None:
59+
# main fails, all recoveries fail, composite fails
60+
main = StatusQueue("Main", [Status.FAILURE], eventually=None)
61+
rec1 = StatusQueue("Rec1", [Status.FAILURE], eventually=None)
62+
rec2 = StatusQueue("Rec2", [Status.FAILURE], eventually=None)
63+
root = Recovery("Recovery", children=[main, rec1, rec2])
64+
65+
root.tick_once()
66+
assert root.status == Status.FAILURE

0 commit comments

Comments
 (0)