Skip to content

Commit b2719fd

Browse files
committed
Add Recovery composite to retry a failed main task after attempting one or more recovery behaviors
1 parent ea6c337 commit b2719fd

File tree

3 files changed

+201
-0
lines changed

3 files changed

+201
-0
lines changed

docs/dot/recovery.dot

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
digraph recovery {
2+
graph [fontname="times-roman"];
3+
node [fontname="times-roman"];
4+
edge [fontname="times-roman"];
5+
6+
recovery [label="Recovery", shape=box, style=rounded, fontsize=11];
7+
8+
main [label="Main Behaviour", shape=ellipse, fontsize=10];
9+
rec1 [label="Recovery #1", shape=ellipse, fontsize=10];
10+
rec2 [label="Recovery #2", shape=ellipse, fontsize=10];
11+
recN [label="Recovery #N", shape=ellipse, fontsize=10];
12+
13+
recovery -> main;
14+
recovery -> rec1;
15+
recovery -> rec2;
16+
recovery -> recN;
17+
18+
{rank=same; rec1 rec2 recN}
19+
}

py_trees/composites.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,3 +808,119 @@ def validate_policy_configuration(self) -> None:
808808
)
809809
self.logger.error(error_message)
810810
raise RuntimeError(error_message)
811+
812+
813+
class Recovery(Composite):
814+
"""
815+
A Recovery composite that wraps a main behaviour with a sequence of recovery behaviours.
816+
817+
.. graphviz:: dot/recovery.dot
818+
819+
Execution model:
820+
821+
- Tick the main behaviour first.
822+
- If main returns SUCCESS or RUNNING, propagate that.
823+
- If main returns FAILURE:
824+
* Attempt the next recovery behaviour in sequence.
825+
* If recovery RUNNING, propagate RUNNING.
826+
* If recovery completes (SUCCESS or FAILURE), consume it and
827+
retry main (if any recoveries remain).
828+
- If all recoveries are exhausted and main still fails, return FAILURE.
829+
830+
Args:
831+
832+
name (:obj:`str`): the composite behaviour name
833+
children ([:class:`~py_trees.behaviour.Behaviour`]): list of children,
834+
where the first is the main behaviour and the rest are recovery behaviours
835+
"""
836+
837+
def __init__(
838+
self,
839+
name: str,
840+
children: typing.Optional[typing.Sequence[behaviour.Behaviour]] = None,
841+
):
842+
super().__init__(name, children)
843+
if not children or len(children) < 1:
844+
raise ValueError("Recovery requires at least a main behaviour")
845+
846+
# Explicit references
847+
self.main: behaviour.Behaviour = children[0]
848+
self.recoveries: typing.List[behaviour.Behaviour] = (
849+
list(children[1:]) if len(children) > 1 else []
850+
)
851+
self.current_recovery_index: int = 0
852+
self.running_main = True
853+
854+
def initialise(self) -> None:
855+
"""Reset to the initial state: run main behaviour and restart recovery behaviours sequence."""
856+
self.current_recovery_index = 0
857+
self.running_main = True
858+
859+
def tick(self) -> typing.Iterator[behaviour.Behaviour]:
860+
"""
861+
Tick over the children.
862+
863+
Yields:
864+
:class:`~py_trees.behaviour.Behaviour`: a reference to itself or one of its children
865+
"""
866+
self.logger.debug("%s.tick()" % self.__class__.__name__)
867+
868+
if not self.children:
869+
self.stop(common.Status.FAILURE)
870+
yield self
871+
return
872+
873+
# First try the main behaviour if we are not in the middle of a recovery
874+
if self.running_main:
875+
for node in self.main.tick():
876+
yield node
877+
if node is self.main:
878+
if node.status in (common.Status.SUCCESS, common.Status.RUNNING):
879+
self.status = node.status
880+
yield self
881+
return
882+
elif node.status == common.Status.FAILURE:
883+
# proceed to next recovery
884+
self.running_main = False
885+
886+
# Try recoveries
887+
while self.current_recovery_index < len(self.recoveries):
888+
recovery = self.recoveries[self.current_recovery_index]
889+
for node in recovery.tick():
890+
yield node
891+
if node is recovery:
892+
if node.status == common.Status.RUNNING:
893+
self.status = common.Status.RUNNING
894+
yield self
895+
return
896+
elif node.status == common.Status.SUCCESS:
897+
self.status = common.Status.RUNNING
898+
# consume this recovery and retry main
899+
recovery.stop(common.Status.INVALID)
900+
self.current_recovery_index += 1
901+
self.main.stop(common.Status.INVALID)
902+
self.running_main = True
903+
yield self
904+
return
905+
elif node.status == common.Status.FAILURE:
906+
# consume this recovery and move to next
907+
recovery.stop(common.Status.INVALID)
908+
self.current_recovery_index += 1
909+
yield self
910+
911+
# No recoveries left → fail
912+
self.status = common.Status.FAILURE
913+
yield self
914+
915+
def stop(self, new_status: common.Status = common.Status.INVALID) -> None:
916+
"""
917+
Ensure that children are appropriately stopped and update status.
918+
919+
Args:
920+
new_status : the composite is transitioning to this new status
921+
"""
922+
for child in self.children:
923+
if child.status != common.Status.INVALID:
924+
child.stop(common.Status.INVALID)
925+
self.current_recovery_index = 0
926+
super().stop(new_status)

tests/test_recovery.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# tests/test_recovery.py
2+
3+
from py_trees.behaviours import StatusQueue
4+
from py_trees.common import Status
5+
from py_trees.composites import Recovery
6+
7+
8+
def test_main_success() -> None:
9+
main = StatusQueue("Main", [Status.SUCCESS], eventually=None)
10+
root = Recovery("Recovery", children=[main])
11+
12+
root.tick_once()
13+
assert main.status == Status.SUCCESS
14+
15+
16+
def test_main_running() -> None:
17+
main = StatusQueue("Main", [Status.RUNNING, Status.SUCCESS], eventually=None)
18+
root = Recovery("Recovery", children=[main])
19+
20+
root.tick_once()
21+
assert root.status == Status.RUNNING
22+
23+
root.tick_once()
24+
assert root.status == Status.SUCCESS
25+
26+
27+
def test_recovery_success_then_retry_main() -> None:
28+
# main fails, recovery1 succeeds, main succeeds when retried
29+
main = StatusQueue("Main", [Status.FAILURE, Status.SUCCESS], eventually=None)
30+
rec1 = StatusQueue("Rec1", [Status.SUCCESS], eventually=None)
31+
root = Recovery("Recovery", children=[main, rec1])
32+
33+
# tick 1: main fails, recovery1 succeeds, composite RUNNING
34+
root.tick_once()
35+
assert root.status == Status.RUNNING
36+
37+
# tick 2: main retried, succeeds
38+
root.tick_once()
39+
assert root.status == Status.SUCCESS
40+
41+
42+
def test_recovery_fails_then_next_succeeds() -> None:
43+
# main fails, rec1 fails, rec2 succeeds, then main succeeds
44+
main = StatusQueue("Main", [Status.FAILURE, Status.SUCCESS], eventually=None)
45+
rec1 = StatusQueue("Rec1", [Status.FAILURE], eventually=None)
46+
rec2 = StatusQueue("Rec2", [Status.SUCCESS], eventually=None)
47+
root = Recovery("Recovery", children=[main, rec1, rec2])
48+
49+
# tick 1: main fails, rec1 fails, composite RUNNING
50+
root.tick_once()
51+
assert root.status == Status.RUNNING
52+
53+
# tick 2: main retried, succeeds
54+
root.tick_once()
55+
assert root.status == Status.SUCCESS
56+
57+
58+
def test_all_recoveries_fail() -> None:
59+
# main fails, all recoveries fail, composite fails
60+
main = StatusQueue("Main", [Status.FAILURE], eventually=None)
61+
rec1 = StatusQueue("Rec1", [Status.FAILURE], eventually=None)
62+
rec2 = StatusQueue("Rec2", [Status.FAILURE], eventually=None)
63+
root = Recovery("Recovery", children=[main, rec1, rec2])
64+
65+
root.tick_once()
66+
assert root.status == Status.FAILURE

0 commit comments

Comments
 (0)