diff --git a/nav2_util/include/nav2_util/lifecycle_utils.hpp b/nav2_util/include/nav2_util/lifecycle_utils.hpp index 91517753d49..b0b374cec67 100644 --- a/nav2_util/include/nav2_util/lifecycle_utils.hpp +++ b/nav2_util/include/nav2_util/lifecycle_utils.hpp @@ -77,6 +77,33 @@ void reset_lifecycle_nodes( reset_lifecycle_nodes(split(nodes, ':'), service_call_timeout, retries); } +/// Transition the given lifecycle nodes to the ErrorProcessing state in order +/** At this time, service calls frequently hang for unknown reasons. The only + * way to combat that is to timeout the service call and retry it. To use this + * function, estimate how long your nodes should take to at each transition and + * set your timeout accordingly. + * \param[in] node_names A vector of the fully qualified node names to reset. + * \param[in] service_call_timeout The maximum amount of time to wait for a + * service call. + * \param[in] retries The number of times to try a state transition service call + */ +void process_error_lifecycle_nodes( + const std::vector & node_names, + const std::chrono::seconds service_call_timeout = std::chrono::seconds::max(), + const int retries = 3); + +/// Transition the given lifecycle nodes to the ErrorProcessing state in order. +/** + * \param[in] nodes A ':' seperated list of node names. eg. "/node1:/node2" + */ +void process_error_lifecycle_nodes( + const std::string & nodes, + const std::chrono::seconds service_call_timeout = std::chrono::seconds::max(), + const int retries = 3) +{ + process_error_lifecycle_nodes(split(nodes, ':'), service_call_timeout, retries); +} + } // namespace nav2_util #endif // NAV2_UTIL__LIFECYCLE_UTILS_HPP_ diff --git a/nav2_util/src/lifecycle_utils.cpp b/nav2_util/src/lifecycle_utils.cpp index c778e2abdf7..b324fafe3a0 100644 --- a/nav2_util/src/lifecycle_utils.cpp +++ b/nav2_util/src/lifecycle_utils.cpp @@ -23,6 +23,7 @@ using std::string; using lifecycle_msgs::msg::Transition; +using lifecycle_msgs::msg::State; namespace nav2_util { @@ -98,4 +99,58 @@ void reset_lifecycle_nodes( } } +// TODO(gimait): This should be updated with the missing transitions to error after +// PRs ros2/rcl_interfaces#97, ros2/rcl#618 and ros2/rclcpp#1064 are merged. +// This tool should not be used for until then. +static void processErrorLifecycleNode( + const std::string & node_name, + const std::chrono::seconds service_call_timeout, + const int retries) +{ + LifecycleServiceClient sc(node_name); + uint8_t transition; + // Each state of the lifecycle node has a specific transition + // to the ErrorProcessing state. Some states do not support transitions + // to ErrorProcessing. + switch (sc.get_state(service_call_timeout)) { + case State::TRANSITION_STATE_CONFIGURING: + transition = Transition::TRANSITION_ON_CONFIGURE_ERROR; + break; + case State::TRANSITION_STATE_CLEANINGUP: + transition = Transition::TRANSITION_ON_CLEANUP_ERROR; + break; + case State::TRANSITION_STATE_SHUTTINGDOWN: + transition = Transition::TRANSITION_ON_SHUTDOWN_ERROR; + break; + case State::TRANSITION_STATE_ACTIVATING: + transition = Transition::TRANSITION_ON_ACTIVATE_ERROR; + break; + case State::TRANSITION_STATE_DEACTIVATING: + transition = Transition::TRANSITION_ON_DEACTIVATE_ERROR; + break; + case State::TRANSITION_STATE_ERRORPROCESSING: + transition = Transition::TRANSITION_ON_ERROR_ERROR; + break; + default: + throw std::runtime_error("Invalid state."); + } + + // Despite waiting for the service to be available and using reliable transport + // service calls still frequently hang. To get reliable reset it's necessary + // to timeout the service call and retry it when that happens. + RETRY( + sc.change_state(transition, service_call_timeout), + retries); +} + +void process_error_lifecycle_nodes( + const std::vector & node_names, + const std::chrono::seconds service_call_timeout, + const int retries) +{ + for (const auto & node_name : node_names) { + processErrorLifecycleNode(node_name, service_call_timeout, retries); + } +} + } // namespace nav2_util diff --git a/nav2_util/test/test_lifecycle_utils.cpp b/nav2_util/test/test_lifecycle_utils.cpp index f8724ccf1c2..8b4b0e3fe43 100644 --- a/nav2_util/test/test_lifecycle_utils.cpp +++ b/nav2_util/test/test_lifecycle_utils.cpp @@ -22,6 +22,7 @@ using nav2_util::startup_lifecycle_nodes; using nav2_util::reset_lifecycle_nodes; +using nav2_util::process_error_lifecycle_nodes; class RclCppFixture { @@ -58,3 +59,22 @@ TEST(Lifecycle, interface) node_thread.join(); SUCCEED(); } + +TEST(Lifecycle, error_transition) +{ + std::vector nodes; + nodes.push_back(rclcpp_lifecycle::LifecycleNode::make_shared("foo")); + nodes.push_back(rclcpp_lifecycle::LifecycleNode::make_shared("bar")); + + std::atomic done(false); + std::thread node_thread(SpinNodesUntilDone, nodes, &done); + startup_lifecycle_nodes("/foo:/bar"); + + // At the moment, the lifecycle transitions to ErrorProcessing are not exposed + // to client nodes, so process_error_lifecycle_nodes will always return an exception. + EXPECT_THROW(process_error_lifecycle_nodes("/foo:/bar"), std::runtime_error); + + done = true; + node_thread.join(); + SUCCEED(); +}