-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Parallel configure/activate in lifecycle manager #5541
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
#include "nav2_lifecycle_manager/lifecycle_manager.hpp" | ||
|
||
#include <chrono> | ||
#include <future> | ||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
@@ -45,6 +46,7 @@ LifecycleManager::LifecycleManager(const rclcpp::NodeOptions & options) | |
declare_parameter("service_timeout", 5.0); | ||
declare_parameter("bond_respawn_max_duration", 10.0); | ||
declare_parameter("attempt_respawn_reconnection", true); | ||
declare_parameter("parallel_state_transitions", rclcpp::ParameterValue(true)); | ||
|
||
registerRclPreshutdownCallback(); | ||
|
||
|
@@ -65,6 +67,7 @@ LifecycleManager::LifecycleManager(const rclcpp::NodeOptions & options) | |
bond_respawn_max_duration_ = rclcpp::Duration::from_seconds(respawn_timeout_s); | ||
|
||
get_parameter("attempt_respawn_reconnection", attempt_respawn_reconnection_); | ||
get_parameter("parallel_state_transitions", parallel_state_transitions_); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docs.nav2.org needs to be updated with the configuration guide for the new parameter. Also a migration guide entry talking about this feature and some metrics would be nice so other users are aware. |
||
|
||
callback_group_ = create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive, false); | ||
|
||
|
@@ -281,34 +284,104 @@ LifecycleManager::changeStateForNode(const std::string & node_name, std::uint8_t | |
bool | ||
LifecycleManager::changeStateForAllNodes(std::uint8_t transition, bool hard_change) | ||
{ | ||
// Hard change will continue even if a node fails | ||
if (transition == Transition::TRANSITION_CONFIGURE || | ||
transition == Transition::TRANSITION_ACTIVATE) | ||
{ | ||
for (auto & node_name : node_names_) { | ||
try { | ||
if (!changeStateForNode(node_name, transition) && !hard_change) { | ||
return false; | ||
/* Function partially created using claude */ | ||
size_t active_nodes_count = 0; | ||
std::string nodes_in_error_state = ""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unused. |
||
std::string unconfigured_nodes = ""; | ||
std::string inactive_nodes = ""; | ||
std::string delimiter(", "); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be necessary, we have the information in the code necessary to check state returns |
||
|
||
if (parallel_state_transitions_) { | ||
// Parallel execution | ||
std::vector<std::future<bool>> futures; | ||
std::vector<std::string> processing_nodes; | ||
|
||
// Hard change will continue even if a node fails | ||
if (transition == Transition::TRANSITION_CONFIGURE || | ||
transition == Transition::TRANSITION_ACTIVATE) | ||
{ | ||
// Launch all state changes in parallel | ||
for (auto & node_name : node_names_) { | ||
futures.emplace_back(std::async(std::launch::async, [this, node_name, transition]() { | ||
try { | ||
return changeStateForNode(node_name, transition); | ||
} catch (const std::runtime_error & e) { | ||
RCLCPP_ERROR( | ||
get_logger(), | ||
"Failed to change state for node: %s. Exception: %s.", node_name.c_str(), e.what()); | ||
return false; | ||
} | ||
})); | ||
processing_nodes.push_back(node_name); | ||
} | ||
} else { | ||
// For deactivation/cleanup, process in reverse order but still in parallel | ||
std::vector<std::string> reverse_nodes(node_names_.rbegin(), node_names_.rend()); | ||
for (auto & node_name : reverse_nodes) { | ||
futures.emplace_back(std::async(std::launch::async, [this, node_name, transition]() { | ||
try { | ||
return changeStateForNode(node_name, transition); | ||
} catch (const std::runtime_error & e) { | ||
RCLCPP_ERROR( | ||
get_logger(), | ||
"Failed to change state for node: %s. Exception: %s.", node_name.c_str(), e.what()); | ||
return false; | ||
} | ||
})); | ||
processing_nodes.push_back(node_name); | ||
} | ||
} | ||
|
||
// Wait for all futures and collect results | ||
for (size_t i = 0; i < futures.size(); ++i) { | ||
bool success = futures[i].get(); | ||
const std::string & node_name = processing_nodes[i]; | ||
|
||
if (!success && !hard_change) { | ||
uint8_t state = node_map_[node_name]->get_state(); | ||
if (!strcmp(reinterpret_cast<char *>(&state), "Inactive")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have the |
||
inactive_nodes += node_name + delimiter; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unused. |
||
} else { | ||
unconfigured_nodes += node_name + delimiter; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unused. |
||
} | ||
} catch (const std::runtime_error & e) { | ||
RCLCPP_ERROR( | ||
get_logger(), | ||
"Failed to change state for node: %s. Exception: %s.", node_name.c_str(), e.what()); | ||
return false; | ||
} else if (success) { | ||
++active_nodes_count; | ||
} | ||
} | ||
if (active_nodes_count != node_names_.size()) { | ||
return false; | ||
} | ||
|
||
} else { | ||
std::vector<std::string>::reverse_iterator rit; | ||
for (rit = node_names_.rbegin(); rit != node_names_.rend(); ++rit) { | ||
try { | ||
if (!changeStateForNode(*rit, transition) && !hard_change) { | ||
// Sequential execution (original behavior) | ||
if (transition == Transition::TRANSITION_CONFIGURE || | ||
transition == Transition::TRANSITION_ACTIVATE) | ||
{ | ||
for (auto & node_name : node_names_) { | ||
try { | ||
if (!changeStateForNode(node_name, transition) && !hard_change) { | ||
return false; | ||
} | ||
} catch (const std::runtime_error & e) { | ||
RCLCPP_ERROR( | ||
get_logger(), | ||
"Failed to change state for node: %s. Exception: %s.", node_name.c_str(), e.what()); | ||
return false; | ||
} | ||
} catch (const std::runtime_error & e) { | ||
RCLCPP_ERROR( | ||
} | ||
} else { | ||
std::vector<std::string>::reverse_iterator rit; | ||
for (rit = node_names_.rbegin(); rit != node_names_.rend(); ++rit) { | ||
try { | ||
if (!changeStateForNode(*rit, transition) && !hard_change) { | ||
return false; | ||
} | ||
} catch (const std::runtime_error & e) { | ||
RCLCPP_ERROR( | ||
get_logger(), | ||
"Failed to change state for node: %s. Exception: %s.", (*rit).c_str(), e.what()); | ||
return false; | ||
return false; | ||
} | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We would want this default off since Nav2 servers do indeed depend on each other.