Skip to content

Commit 90e4511

Browse files
wjwwoodJanosch MachowinskimjcarrollJanosch Machowinski
authored
[wjwwood] Updated "Data race fixes" (#2500)
* Fix callback group logic in executor Signed-off-by: Janosch Machowinski <[email protected]> * fix: Fixed unnecessary copy of wait_set Signed-off-by: Janosch Machowinski <[email protected]> * fix(executor): Fixed race conditions with rebuild of wait_sets Before this change, the rebuild of wait set would be triggered after the wait set was waken up. With bad timing, this could lead to the rebuild not happening with multi threaded executor. Signed-off-by: Janosch Machowinski <[email protected]> * fix(Executor): Fixed lost of entities rebuild request Signed-off-by: Janosch Machowinski <[email protected]> * chore: Added assert for not set callback_group in execute_any_executable Signed-off-by: Janosch Machowinski <[email protected]> * Add test for cbg getting reset Signed-off-by: Michael Carroll <[email protected]> Co-authored-by: Janosch Machowinski <[email protected]> * chore: renamed test cases to snake_case Signed-off-by: Janosch Machowinski <[email protected]> * style Signed-off-by: William Woodall <[email protected]> * fixup test to avoid polling and short timeouts Signed-off-by: William Woodall <[email protected]> * fix: Use correct notify_waitable_ instance Signed-off-by: Janosch Machowinski <[email protected]> * fix(StaticSingleThreadedExecutor): Added missing special case handling for current_notify_waitable_ Signed-off-by: Janosch Machowinski <[email protected]> * fix(TestCallbackGroup): Fixed test after change to timers Signed-off-by: Janosch Machowinski <[email protected]> --------- Signed-off-by: Janosch Machowinski <[email protected]> Signed-off-by: Janosch Machowinski <[email protected]> Signed-off-by: Michael Carroll <[email protected]> Signed-off-by: William Woodall <[email protected]> Co-authored-by: Janosch Machowinski <[email protected]> Co-authored-by: Michael Carroll <[email protected]> Co-authored-by: Janosch Machowinski <[email protected]>
1 parent dec22a2 commit 90e4511

File tree

7 files changed

+247
-73
lines changed

7 files changed

+247
-73
lines changed

rclcpp/include/rclcpp/executor.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,15 @@ class Executor
550550
AnyExecutable & any_executable,
551551
std::chrono::nanoseconds timeout = std::chrono::nanoseconds(-1));
552552

553+
/// This function triggers a recollect of all entities that are registered to the executor.
554+
/**
555+
* Calling this function is thread safe.
556+
*
557+
* \param[in] notify if true will execute a trigger that will wake up a waiting executor
558+
*/
559+
void
560+
trigger_entity_recollect(bool notify);
561+
553562
/// Spinning state, used to prevent multi threaded calls to spin and to cancel blocking spins.
554563
std::atomic_bool spinning;
555564

rclcpp/include/rclcpp/wait_result.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ class WaitResult final
274274

275275
if (this->kind() == WaitResultKind::Ready) {
276276
auto & wait_set = this->get_wait_set();
277-
auto rcl_wait_set = wait_set.get_rcl_wait_set();
277+
auto & rcl_wait_set = wait_set.get_rcl_wait_set();
278278
while (next_waitable_index_ < wait_set.size_of_waitables()) {
279279
auto cur_waitable = wait_set.waitables(next_waitable_index_++);
280280
if (cur_waitable != nullptr && cur_waitable->is_ready(rcl_wait_set)) {

rclcpp/src/rclcpp/executor.cpp

Lines changed: 59 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
#include <algorithm>
16+
#include <cassert>
1617
#include <chrono>
1718
#include <iterator>
1819
#include <memory>
@@ -72,13 +73,10 @@ Executor::Executor(const rclcpp::ExecutorOptions & options)
7273
}
7374
});
7475

75-
notify_waitable_->set_on_ready_callback(
76-
[this](auto, auto) {
77-
this->entities_need_rebuild_.store(true);
78-
});
79-
8076
notify_waitable_->add_guard_condition(interrupt_guard_condition_);
8177
notify_waitable_->add_guard_condition(shutdown_guard_condition_);
78+
79+
wait_set_.add_waitable(notify_waitable_);
8280
}
8381

8482
Executor::~Executor()
@@ -122,6 +120,20 @@ Executor::~Executor()
122120
}
123121
}
124122

123+
void Executor::trigger_entity_recollect(bool notify)
124+
{
125+
this->entities_need_rebuild_.store(true);
126+
127+
if (!spinning.load() && entities_need_rebuild_.exchange(false)) {
128+
std::lock_guard<std::mutex> guard(mutex_);
129+
this->collect_entities();
130+
}
131+
132+
if (notify) {
133+
interrupt_guard_condition_->trigger();
134+
}
135+
}
136+
125137
std::vector<rclcpp::CallbackGroup::WeakPtr>
126138
Executor::get_all_callback_groups()
127139
{
@@ -152,19 +164,12 @@ Executor::add_callback_group(
152164
(void) node_ptr;
153165
this->collector_.add_callback_group(group_ptr);
154166

155-
if (!spinning.load()) {
156-
std::lock_guard<std::mutex> guard(mutex_);
157-
this->collect_entities();
158-
}
159-
160-
if (notify) {
161-
try {
162-
interrupt_guard_condition_->trigger();
163-
} catch (const rclcpp::exceptions::RCLError & ex) {
164-
throw std::runtime_error(
165-
std::string(
166-
"Failed to trigger guard condition on callback group add: ") + ex.what());
167-
}
167+
try {
168+
this->trigger_entity_recollect(notify);
169+
} catch (const rclcpp::exceptions::RCLError & ex) {
170+
throw std::runtime_error(
171+
std::string(
172+
"Failed to trigger guard condition on callback group add: ") + ex.what());
168173
}
169174
}
170175

@@ -173,19 +178,12 @@ Executor::add_node(rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_pt
173178
{
174179
this->collector_.add_node(node_ptr);
175180

176-
if (!spinning.load()) {
177-
std::lock_guard<std::mutex> guard(mutex_);
178-
this->collect_entities();
179-
}
180-
181-
if (notify) {
182-
try {
183-
interrupt_guard_condition_->trigger();
184-
} catch (const rclcpp::exceptions::RCLError & ex) {
185-
throw std::runtime_error(
186-
std::string(
187-
"Failed to trigger guard condition on node add: ") + ex.what());
188-
}
181+
try {
182+
this->trigger_entity_recollect(notify);
183+
} catch (const rclcpp::exceptions::RCLError & ex) {
184+
throw std::runtime_error(
185+
std::string(
186+
"Failed to trigger guard condition on node add: ") + ex.what());
189187
}
190188
}
191189

@@ -196,18 +194,12 @@ Executor::remove_callback_group(
196194
{
197195
this->collector_.remove_callback_group(group_ptr);
198196

199-
if (!spinning.load()) {
200-
std::lock_guard<std::mutex> guard(mutex_);
201-
this->collect_entities();
202-
}
203-
if (notify) {
204-
try {
205-
interrupt_guard_condition_->trigger();
206-
} catch (const rclcpp::exceptions::RCLError & ex) {
207-
throw std::runtime_error(
208-
std::string(
209-
"Failed to trigger guard condition on callback group remove: ") + ex.what());
210-
}
197+
try {
198+
this->trigger_entity_recollect(notify);
199+
} catch (const rclcpp::exceptions::RCLError & ex) {
200+
throw std::runtime_error(
201+
std::string(
202+
"Failed to trigger guard condition on callback group remove: ") + ex.what());
211203
}
212204
}
213205

@@ -222,19 +214,12 @@ Executor::remove_node(rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node
222214
{
223215
this->collector_.remove_node(node_ptr);
224216

225-
if (!spinning.load()) {
226-
std::lock_guard<std::mutex> guard(mutex_);
227-
this->collect_entities();
228-
}
229-
230-
if (notify) {
231-
try {
232-
interrupt_guard_condition_->trigger();
233-
} catch (const rclcpp::exceptions::RCLError & ex) {
234-
throw std::runtime_error(
235-
std::string(
236-
"Failed to trigger guard condition on node remove: ") + ex.what());
237-
}
217+
try {
218+
this->trigger_entity_recollect(notify);
219+
} catch (const rclcpp::exceptions::RCLError & ex) {
220+
throw std::runtime_error(
221+
std::string(
222+
"Failed to trigger guard condition on node remove: ") + ex.what());
238223
}
239224
}
240225

@@ -379,6 +364,10 @@ Executor::execute_any_executable(AnyExecutable & any_exec)
379364
return;
380365
}
381366

367+
assert(
368+
(void("cannot execute an AnyExecutable without a valid callback group"),
369+
any_exec.callback_group));
370+
382371
if (any_exec.timer) {
383372
TRACETOOLS_TRACEPOINT(
384373
rclcpp_executor_execute,
@@ -403,9 +392,7 @@ Executor::execute_any_executable(AnyExecutable & any_exec)
403392
}
404393

405394
// Reset the callback_group, regardless of type
406-
if (any_exec.callback_group) {
407-
any_exec.callback_group->can_be_taken_from().store(true);
408-
}
395+
any_exec.callback_group->can_be_taken_from().store(true);
409396
}
410397

411398
template<typename Taker, typename Handler>
@@ -642,7 +629,6 @@ Executor::collect_entities()
642629
// In the case that an entity already has an expired weak pointer
643630
// before being removed from the waitset, additionally prune the waitset.
644631
this->wait_set_.prune_deleted_entities();
645-
this->entities_need_rebuild_.store(false);
646632
}
647633

648634
void
@@ -655,7 +641,7 @@ Executor::wait_for_work(std::chrono::nanoseconds timeout)
655641

656642
{
657643
std::lock_guard<std::mutex> guard(mutex_);
658-
if (current_collection_.empty() || this->entities_need_rebuild_.load()) {
644+
if (this->entities_need_rebuild_.exchange(false) || current_collection_.empty()) {
659645
this->collect_entities();
660646
}
661647
}
@@ -664,6 +650,13 @@ Executor::wait_for_work(std::chrono::nanoseconds timeout)
664650
RCUTILS_LOG_WARN_NAMED(
665651
"rclcpp",
666652
"empty wait set received in wait(). This should never happen.");
653+
} else {
654+
if (this->wait_result_->kind() == WaitResultKind::Ready && current_notify_waitable_) {
655+
auto & rcl_wait_set = this->wait_result_->get_wait_set().get_rcl_wait_set();
656+
if (current_notify_waitable_->is_ready(rcl_wait_set)) {
657+
current_notify_waitable_->execute(current_notify_waitable_->take_data());
658+
}
659+
}
667660
}
668661
}
669662

@@ -689,7 +682,7 @@ Executor::get_next_ready_executable(AnyExecutable & any_executable)
689682
auto entity_iter = current_collection_.timers.find(timer->get_timer_handle().get());
690683
if (entity_iter != current_collection_.timers.end()) {
691684
auto callback_group = entity_iter->second.callback_group.lock();
692-
if (callback_group && !callback_group->can_be_taken_from()) {
685+
if (!callback_group || !callback_group->can_be_taken_from()) {
693686
current_timer_index++;
694687
continue;
695688
}
@@ -719,7 +712,7 @@ Executor::get_next_ready_executable(AnyExecutable & any_executable)
719712
subscription->get_subscription_handle().get());
720713
if (entity_iter != current_collection_.subscriptions.end()) {
721714
auto callback_group = entity_iter->second.callback_group.lock();
722-
if (callback_group && !callback_group->can_be_taken_from()) {
715+
if (!callback_group || !callback_group->can_be_taken_from()) {
723716
continue;
724717
}
725718
any_executable.subscription = subscription;
@@ -735,7 +728,7 @@ Executor::get_next_ready_executable(AnyExecutable & any_executable)
735728
auto entity_iter = current_collection_.services.find(service->get_service_handle().get());
736729
if (entity_iter != current_collection_.services.end()) {
737730
auto callback_group = entity_iter->second.callback_group.lock();
738-
if (callback_group && !callback_group->can_be_taken_from()) {
731+
if (!callback_group || !callback_group->can_be_taken_from()) {
739732
continue;
740733
}
741734
any_executable.service = service;
@@ -751,7 +744,7 @@ Executor::get_next_ready_executable(AnyExecutable & any_executable)
751744
auto entity_iter = current_collection_.clients.find(client->get_client_handle().get());
752745
if (entity_iter != current_collection_.clients.end()) {
753746
auto callback_group = entity_iter->second.callback_group.lock();
754-
if (callback_group && !callback_group->can_be_taken_from()) {
747+
if (!callback_group || !callback_group->can_be_taken_from()) {
755748
continue;
756749
}
757750
any_executable.client = client;
@@ -767,7 +760,7 @@ Executor::get_next_ready_executable(AnyExecutable & any_executable)
767760
auto entity_iter = current_collection_.waitables.find(waitable.get());
768761
if (entity_iter != current_collection_.waitables.end()) {
769762
auto callback_group = entity_iter->second.callback_group.lock();
770-
if (callback_group && !callback_group->can_be_taken_from()) {
763+
if (!callback_group || !callback_group->can_be_taken_from()) {
771764
continue;
772765
}
773766
any_executable.waitable = waitable;

rclcpp/src/rclcpp/executors/executor_entities_collection.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ ready_executables(
153153
continue;
154154
}
155155
auto group_info = group_cache(entity_iter->second.callback_group);
156-
if (group_info && !group_info->can_be_taken_from().load()) {
156+
if (!group_info || !group_info->can_be_taken_from().load()) {
157157
continue;
158158
}
159159
if (!entity->call()) {
@@ -176,7 +176,7 @@ ready_executables(
176176
continue;
177177
}
178178
auto group_info = group_cache(entity_iter->second.callback_group);
179-
if (group_info && !group_info->can_be_taken_from().load()) {
179+
if (!group_info || !group_info->can_be_taken_from().load()) {
180180
continue;
181181
}
182182
rclcpp::AnyExecutable exec;
@@ -196,7 +196,7 @@ ready_executables(
196196
continue;
197197
}
198198
auto group_info = group_cache(entity_iter->second.callback_group);
199-
if (group_info && !group_info->can_be_taken_from().load()) {
199+
if (!group_info || !group_info->can_be_taken_from().load()) {
200200
continue;
201201
}
202202
rclcpp::AnyExecutable exec;
@@ -216,7 +216,7 @@ ready_executables(
216216
continue;
217217
}
218218
auto group_info = group_cache(entity_iter->second.callback_group);
219-
if (group_info && !group_info->can_be_taken_from().load()) {
219+
if (!group_info || !group_info->can_be_taken_from().load()) {
220220
continue;
221221
}
222222
rclcpp::AnyExecutable exec;
@@ -236,7 +236,7 @@ ready_executables(
236236
continue;
237237
}
238238
auto group_info = group_cache(entry.callback_group);
239-
if (group_info && !group_info->can_be_taken_from().load()) {
239+
if (!group_info || !group_info->can_be_taken_from().load()) {
240240
continue;
241241
}
242242
rclcpp::AnyExecutable exec;

rclcpp/src/rclcpp/executors/static_single_threaded_executor.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ StaticSingleThreadedExecutor::spin_once_impl(std::chrono::nanoseconds timeout)
110110
std::optional<rclcpp::WaitResult<rclcpp::WaitSet>>
111111
StaticSingleThreadedExecutor::collect_and_wait(std::chrono::nanoseconds timeout)
112112
{
113-
if (current_collection_.empty() || this->entities_need_rebuild_.load()) {
113+
if (this->entities_need_rebuild_.exchange(false) || current_collection_.empty()) {
114114
this->collect_entities();
115115
}
116116
auto wait_result = wait_set_.wait(std::chrono::nanoseconds(timeout));
@@ -119,6 +119,13 @@ StaticSingleThreadedExecutor::collect_and_wait(std::chrono::nanoseconds timeout)
119119
"rclcpp",
120120
"empty wait set received in wait(). This should never happen.");
121121
return {};
122+
} else {
123+
if (wait_result.kind() == WaitResultKind::Ready && current_notify_waitable_) {
124+
auto & rcl_wait_set = wait_result.get_wait_set().get_rcl_wait_set();
125+
if (current_notify_waitable_->is_ready(rcl_wait_set)) {
126+
current_notify_waitable_->execute(current_notify_waitable_->take_data());
127+
}
128+
}
122129
}
123130
return wait_result;
124131
}

rclcpp/test/rclcpp/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,15 @@ if(TARGET test_executors)
473473
target_link_libraries(test_executors_timer_cancel_behavior ${PROJECT_NAME} ${rosgraph_msgs_TARGETS})
474474
endif()
475475

476+
ament_add_gtest(
477+
test_executors_callback_group_behavior
478+
executors/test_executors_callback_group_behavior.cpp
479+
APPEND_LIBRARY_DIRS "${append_library_dirs}"
480+
TIMEOUT 180)
481+
if(TARGET test_executors)
482+
target_link_libraries(test_executors_callback_group_behavior ${PROJECT_NAME})
483+
endif()
484+
476485
ament_add_gtest(
477486
test_executors_intraprocess
478487
executors/test_executors_intraprocess.cpp

0 commit comments

Comments
 (0)