diff --git a/rclcpp/CMakeLists.txt b/rclcpp/CMakeLists.txt
index 6fd6c4586b..5098089105 100644
--- a/rclcpp/CMakeLists.txt
+++ b/rclcpp/CMakeLists.txt
@@ -63,9 +63,10 @@ set(${PROJECT_NAME}_SRCS
   src/rclcpp/executors/executor_notify_waitable.cpp
   src/rclcpp/executors/multi_threaded_executor.cpp
   src/rclcpp/executors/single_threaded_executor.cpp
-  src/rclcpp/executors/static_executor_entities_collector.cpp
   src/rclcpp/executors/static_single_threaded_executor.cpp
   src/rclcpp/expand_topic_or_service_name.cpp
+  src/rclcpp/experimental/executors/events_executor/events_executor.cpp
+  src/rclcpp/experimental/timers_manager.cpp
   src/rclcpp/future_return_code.cpp
   src/rclcpp/generic_publisher.cpp
   src/rclcpp/generic_subscription.cpp
diff --git a/rclcpp/include/rclcpp/any_executable.hpp b/rclcpp/include/rclcpp/any_executable.hpp
index 5d4064f452..e4e9eaecb0 100644
--- a/rclcpp/include/rclcpp/any_executable.hpp
+++ b/rclcpp/include/rclcpp/any_executable.hpp
@@ -45,9 +45,9 @@ struct AnyExecutable
   rclcpp::ClientBase::SharedPtr client;
   rclcpp::Waitable::SharedPtr waitable;
   // These are used to keep the scope on the containing items
-  rclcpp::CallbackGroup::SharedPtr callback_group;
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_base;
-  std::shared_ptr<void> data;
+  rclcpp::CallbackGroup::SharedPtr callback_group {nullptr};
+  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_base {nullptr};
+  std::shared_ptr<void> data {nullptr};
 };
 
 }  // namespace rclcpp
diff --git a/rclcpp/include/rclcpp/callback_group.hpp b/rclcpp/include/rclcpp/callback_group.hpp
index 43c7daa888..dcb38d29f6 100644
--- a/rclcpp/include/rclcpp/callback_group.hpp
+++ b/rclcpp/include/rclcpp/callback_group.hpp
@@ -185,18 +185,41 @@ class CallbackGroup
    * \return the number of entities in the callback group.
    */
   RCLCPP_PUBLIC
-  size_t size() const;
+  size_t
+  size() const;
 
+  /// Return a reference to the 'can be taken' atomic boolean.
+  /**
+   * The resulting bool will be true in the case that no executor is currently
+   * using an executable entity from this group.
+   * The resulting bool will be false in the case that an executor is currently
+   * using an executable entity from this group, and the group policy doesn't
+   * allow a second take (eg mutual exclusion)
+   * \return a reference to the flag
+   */
   RCLCPP_PUBLIC
   std::atomic_bool &
   can_be_taken_from();
 
+  /// Get the group type.
+  /**
+   * \return the group type
+   */
   RCLCPP_PUBLIC
   const CallbackGroupType &
   type() const;
 
+  /// Collect all of the entity pointers contained in this callback group.
+  /**
+   * \param[in] sub_func Function to execute for each subscription
+   * \param[in] service_func Function to execute for each service
+   * \param[in] client_func Function to execute for each client
+   * \param[in] timer_func Function to execute for each timer
+   * \param[in] waitable_fuinc Function to execute for each waitable
+   */
   RCLCPP_PUBLIC
-  void collect_all_ptrs(
+  void
+  collect_all_ptrs(
     std::function<void(const rclcpp::SubscriptionBase::SharedPtr &)> sub_func,
     std::function<void(const rclcpp::ServiceBase::SharedPtr &)> service_func,
     std::function<void(const rclcpp::ClientBase::SharedPtr &)> client_func,
diff --git a/rclcpp/include/rclcpp/executor.hpp b/rclcpp/include/rclcpp/executor.hpp
index 116ed04a3c..0874aca3b3 100644
--- a/rclcpp/include/rclcpp/executor.hpp
+++ b/rclcpp/include/rclcpp/executor.hpp
@@ -29,26 +29,24 @@
 
 #include "rcl/guard_condition.h"
 #include "rcl/wait.h"
+#include "rclcpp/executors/executor_notify_waitable.hpp"
 #include "rcpputils/scope_exit.hpp"
 
 #include "rclcpp/context.hpp"
 #include "rclcpp/contexts/default_context.hpp"
 #include "rclcpp/guard_condition.hpp"
 #include "rclcpp/executor_options.hpp"
+#include "rclcpp/executors/executor_entities_collection.hpp"
+#include "rclcpp/executors/executor_entities_collector.hpp"
 #include "rclcpp/future_return_code.hpp"
-#include "rclcpp/memory_strategies.hpp"
-#include "rclcpp/memory_strategy.hpp"
 #include "rclcpp/node_interfaces/node_base_interface.hpp"
 #include "rclcpp/utilities.hpp"
 #include "rclcpp/visibility_control.hpp"
+#include "rclcpp/wait_set.hpp"
 
 namespace rclcpp
 {
 
-typedef std::map<rclcpp::CallbackGroup::WeakPtr,
-    rclcpp::node_interfaces::NodeBaseInterface::WeakPtr,
-    std::owner_less<rclcpp::CallbackGroup::WeakPtr>> WeakCallbackGroupsToNodesMap;
-
 // Forward declaration is used in convenience method signature.
 class Node;
 class ExecutorImplementation;
@@ -393,17 +391,6 @@ class Executor
   void
   cancel();
 
-  /// Support dynamic switching of the memory strategy.
-  /**
-   * Switching the memory strategy while the executor is spinning in another threading could have
-   * unintended consequences.
-   * \param[in] memory_strategy Shared pointer to the memory strategy to set.
-   * \throws std::runtime_error if memory_strategy is null
-   */
-  RCLCPP_PUBLIC
-  void
-  set_memory_strategy(memory_strategy::MemoryStrategy::SharedPtr memory_strategy);
-
   /// Returns true if the executor is currently spinning.
   /**
    * This function can be called asynchronously from any thread.
@@ -441,7 +428,7 @@ class Executor
 
   RCLCPP_PUBLIC
   static void
-  execute_timer(rclcpp::TimerBase::SharedPtr timer);
+  execute_timer(rclcpp::TimerBase::SharedPtr timer, const std::shared_ptr<void> & dataPtr);
 
   RCLCPP_PUBLIC
   static void
@@ -451,6 +438,12 @@ class Executor
   static void
   execute_client(rclcpp::ClientBase::SharedPtr client);
 
+  /// Gather all of the waitable entities from associated nodes and callback groups.
+  RCLCPP_PUBLIC
+  void
+  collect_entities();
+
+  /// Block until more work becomes avilable or timeout is reached.
   /**
    * \throws std::runtime_error if the wait set can be cleared
    */
@@ -458,82 +451,33 @@ class Executor
   void
   wait_for_work(std::chrono::nanoseconds timeout = std::chrono::nanoseconds(-1));
 
-  RCLCPP_PUBLIC
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr
-  get_node_by_group(
-    const WeakCallbackGroupsToNodesMap & weak_groups_to_nodes,
-    rclcpp::CallbackGroup::SharedPtr group);
-
-  /// Return true if the node has been added to this executor.
+  /// Check for executable in ready state and populate union structure.
   /**
-   * \param[in] node_ptr a shared pointer that points to a node base interface
-   * \param[in] weak_groups_to_nodes map to nodes to lookup
-   * \return true if the node is associated with the executor, otherwise false
+   * \param[out] any_executable populated union structure of ready executable
+   * \return true if an executable was ready and any_executable was populated,
+   *   otherwise false
    */
   RCLCPP_PUBLIC
   bool
-  has_node(
-    const rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    const WeakCallbackGroupsToNodesMap & weak_groups_to_nodes) const;
-
-  RCLCPP_PUBLIC
-  rclcpp::CallbackGroup::SharedPtr
-  get_group_by_timer(rclcpp::TimerBase::SharedPtr timer);
-
-  /// Add a callback group to an executor
-  /**
-   * \see rclcpp::Executor::add_callback_group
-   */
-  RCLCPP_PUBLIC
-  virtual void
-  add_callback_group_to_map(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    WeakCallbackGroupsToNodesMap & weak_groups_to_nodes,
-    bool notify = true) RCPPUTILS_TSA_REQUIRES(mutex_);
+  get_next_ready_executable(AnyExecutable & any_executable);
 
-  /// Remove a callback group from the executor.
+  /// Wait for executable in ready state and populate union structure.
   /**
-   * \see rclcpp::Executor::remove_callback_group
+   * If an executable is ready, it will return immediately, otherwise
+   * block based on the timeout for work to become ready.
+   *
+   * \param[out] any_executable populated union structure of ready executable
+   * \param[in] timeout duration of time to wait for work, a negative value
+   *   (the defualt behavior), will make this function block indefinitely
+   * \return true if an executable was ready and any_executable was populated,
+   *   otherwise false
    */
   RCLCPP_PUBLIC
-  virtual void
-  remove_callback_group_from_map(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    WeakCallbackGroupsToNodesMap & weak_groups_to_nodes,
-    bool notify = true) RCPPUTILS_TSA_REQUIRES(mutex_);
-
-  RCLCPP_PUBLIC
-  bool
-  get_next_ready_executable(AnyExecutable & any_executable);
-
-  RCLCPP_PUBLIC
-  bool
-  get_next_ready_executable_from_map(
-    AnyExecutable & any_executable,
-    const WeakCallbackGroupsToNodesMap & weak_groups_to_nodes);
-
-  RCLCPP_PUBLIC
   bool
   get_next_executable(
     AnyExecutable & any_executable,
     std::chrono::nanoseconds timeout = std::chrono::nanoseconds(-1));
 
-  /// Add all callback groups that can be automatically added from associated nodes.
-  /**
-   * The executor, before collecting entities, verifies if any callback group from
-   * nodes associated with the executor, which is not already associated to an executor,
-   * can be automatically added to this executor.
-   * This takes care of any callback group that has been added to a node but not explicitly added
-   * to the executor.
-   * It is important to note that in order for the callback groups to be automatically added to an
-   * executor through this function, the node of the callback groups needs to have been added
-   * through the `add_node` method.
-   */
-  RCLCPP_PUBLIC
-  virtual void
-  add_callback_groups_from_nodes_associated_to_executor() RCPPUTILS_TSA_REQUIRES(mutex_);
-
   /// Spinning state, used to prevent multi threaded calls to spin and to cancel blocking spins.
   std::atomic_bool spinning;
 
@@ -543,16 +487,8 @@ class Executor
   /// Guard condition for signaling the rmw layer to wake up for system shutdown.
   std::shared_ptr<rclcpp::GuardCondition> shutdown_guard_condition_;
 
-  /// Wait set for managing entities that the rmw layer waits on.
-  rcl_wait_set_t wait_set_ = rcl_get_zero_initialized_wait_set();
-
-  // Mutex to protect the subsequent memory_strategy_.
   mutable std::mutex mutex_;
 
-  /// The memory strategy: an interface for handling user-defined memory allocation strategies.
-  memory_strategy::MemoryStrategy::SharedPtr
-  memory_strategy_ RCPPUTILS_TSA_PT_GUARDED_BY(mutex_);
-
   /// The context associated with this executor.
   std::shared_ptr<rclcpp::Context> context_;
 
@@ -562,39 +498,31 @@ class Executor
   virtual void
   spin_once_impl(std::chrono::nanoseconds timeout);
 
-  typedef std::map<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr,
-      const rclcpp::GuardCondition *,
-      std::owner_less<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr>>
-    WeakNodesToGuardConditionsMap;
-
-  typedef std::map<rclcpp::CallbackGroup::WeakPtr,
-      const rclcpp::GuardCondition *,
-      std::owner_less<rclcpp::CallbackGroup::WeakPtr>>
-    WeakCallbackGroupsToGuardConditionsMap;
-
-  /// maps nodes to guard conditions
-  WeakNodesToGuardConditionsMap
-  weak_nodes_to_guard_conditions_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  /// Waitable containing guard conditions controlling the executor flow.
+  /**
+   * This waitable contains the interrupt and shutdown guard condition, as well
+   * as the guard condition associated with each node and callback group.
+   * By default, if any change is detected in the monitored entities, the notify
+   * waitable will awake the executor and rebuild the collections.
+   */
+  std::shared_ptr<rclcpp::executors::ExecutorNotifyWaitable> notify_waitable_;
 
-  /// maps callback groups to guard conditions
-  WeakCallbackGroupsToGuardConditionsMap
-  weak_groups_to_guard_conditions_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  std::atomic_bool entities_need_rebuild_;
 
-  /// maps callback groups associated to nodes
-  WeakCallbackGroupsToNodesMap
-  weak_groups_associated_with_executor_to_nodes_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  /// Collector used to associate executable entities from nodes and guard conditions
+  rclcpp::executors::ExecutorEntitiesCollector collector_;
 
-  /// maps callback groups to nodes associated with executor
-  WeakCallbackGroupsToNodesMap
-  weak_groups_to_nodes_associated_with_executor_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  /// WaitSet to be waited on.
+  rclcpp::WaitSet wait_set_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  std::optional<rclcpp::WaitResult<rclcpp::WaitSet>> wait_result_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
 
-  /// maps all callback groups to nodes
-  WeakCallbackGroupsToNodesMap
-  weak_groups_to_nodes_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  /// Hold the current state of the collection being waited on by the waitset
+  rclcpp::executors::ExecutorEntitiesCollection current_collection_ RCPPUTILS_TSA_GUARDED_BY(
+    mutex_);
 
-  /// nodes that are associated with the executor
-  std::list<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr>
-  weak_nodes_ RCPPUTILS_TSA_GUARDED_BY(mutex_);
+  /// Hold the current state of the notify waitable being waited on by the waitset
+  std::shared_ptr<rclcpp::executors::ExecutorNotifyWaitable> current_notify_waitable_
+  RCPPUTILS_TSA_GUARDED_BY(mutex_);
 
   /// shutdown callback handle registered to Context
   rclcpp::OnShutdownCallbackHandle shutdown_callback_handle_;
diff --git a/rclcpp/include/rclcpp/executors/executor_entities_collection.hpp b/rclcpp/include/rclcpp/executors/executor_entities_collection.hpp
index 166bb99119..517894a2a2 100644
--- a/rclcpp/include/rclcpp/executors/executor_entities_collection.hpp
+++ b/rclcpp/include/rclcpp/executors/executor_entities_collection.hpp
@@ -178,6 +178,12 @@ struct ExecutorEntitiesCollection
 
   /// Clear the entities collection
   void clear();
+
+  /// Remove entities that have expired weak ownership
+  /**
+   * \return The total number of removed entities
+   */
+  size_t remove_expired_entities();
 };
 
 /// Build an entities collection from callback groups
diff --git a/rclcpp/include/rclcpp/executors/executor_notify_waitable.hpp b/rclcpp/include/rclcpp/executors/executor_notify_waitable.hpp
index 88158952d9..2b43fecca1 100644
--- a/rclcpp/include/rclcpp/executors/executor_notify_waitable.hpp
+++ b/rclcpp/include/rclcpp/executors/executor_notify_waitable.hpp
@@ -48,11 +48,11 @@ class ExecutorNotifyWaitable : public rclcpp::Waitable
   ~ExecutorNotifyWaitable() override = default;
 
   RCLCPP_PUBLIC
-  ExecutorNotifyWaitable(const ExecutorNotifyWaitable & other);
+  ExecutorNotifyWaitable(ExecutorNotifyWaitable & other);
 
 
   RCLCPP_PUBLIC
-  ExecutorNotifyWaitable & operator=(const ExecutorNotifyWaitable & other);
+  ExecutorNotifyWaitable & operator=(ExecutorNotifyWaitable & other);
 
   /// Add conditions to the wait set
   /**
@@ -88,6 +88,25 @@ class ExecutorNotifyWaitable : public rclcpp::Waitable
   std::shared_ptr<void>
   take_data() override;
 
+  /// Take the data from an entity ID so that it can be consumed with `execute`.
+  /**
+   * \param[in] id ID of the entity to take data from.
+   * \return If available, data to be used, otherwise nullptr
+   * \sa rclcpp::Waitable::take_data_by_entity_id
+   */
+  RCLCPP_PUBLIC
+  std::shared_ptr<void>
+  take_data_by_entity_id(size_t id) override;
+
+  /// Set a callback to be called whenever the waitable becomes ready.
+  /**
+   * \param[in] callback callback to set
+   * \sa rclcpp::Waitable::set_on_ready_callback
+   */
+  RCLCPP_PUBLIC
+  void
+  set_on_ready_callback(std::function<void(size_t, int)> callback) override;
+
   /// Add a guard condition to be waited on.
   /**
    * \param[in] guard_condition The guard condition to add.
@@ -96,13 +115,21 @@ class ExecutorNotifyWaitable : public rclcpp::Waitable
   void
   add_guard_condition(rclcpp::GuardCondition::WeakPtr guard_condition);
 
+  /// Unset any callback registered via set_on_ready_callback.
+  /**
+   * \sa rclcpp::Waitable::clear_on_ready_callback
+   */
+  RCLCPP_PUBLIC
+  void
+  clear_on_ready_callback() override;
+
   /// Remove a guard condition from being waited on.
   /**
-   * \param[in] guard_condition The guard condition to remove.
+   * \param[in] weak_guard_condition The guard condition to remove.
    */
   RCLCPP_PUBLIC
   void
-  remove_guard_condition(rclcpp::GuardCondition::WeakPtr guard_condition);
+  remove_guard_condition(rclcpp::GuardCondition::WeakPtr weak_guard_condition);
 
   /// Get the number of ready guard_conditions
   /**
@@ -118,6 +145,8 @@ class ExecutorNotifyWaitable : public rclcpp::Waitable
 
   std::mutex guard_condition_mutex_;
 
+  std::function<void(size_t)> on_ready_callback_;
+
   /// The collection of guard conditions to be waited on.
   std::set<rclcpp::GuardCondition::WeakPtr,
     std::owner_less<rclcpp::GuardCondition::WeakPtr>> notify_guard_conditions_;
diff --git a/rclcpp/include/rclcpp/executors/static_executor_entities_collector.hpp b/rclcpp/include/rclcpp/executors/static_executor_entities_collector.hpp
deleted file mode 100644
index f9fd2ff672..0000000000
--- a/rclcpp/include/rclcpp/executors/static_executor_entities_collector.hpp
+++ /dev/null
@@ -1,357 +0,0 @@
-// Copyright 2020 Open Source Robotics Foundation, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef RCLCPP__EXECUTORS__STATIC_EXECUTOR_ENTITIES_COLLECTOR_HPP_
-#define RCLCPP__EXECUTORS__STATIC_EXECUTOR_ENTITIES_COLLECTOR_HPP_
-
-#include <chrono>
-#include <list>
-#include <map>
-#include <memory>
-#include <vector>
-
-#include "rcl/guard_condition.h"
-#include "rcl/wait.h"
-
-#include "rclcpp/experimental/executable_list.hpp"
-#include "rclcpp/macros.hpp"
-#include "rclcpp/memory_strategy.hpp"
-#include "rclcpp/visibility_control.hpp"
-#include "rclcpp/waitable.hpp"
-
-namespace rclcpp
-{
-namespace executors
-{
-typedef std::map<rclcpp::CallbackGroup::WeakPtr,
-    rclcpp::node_interfaces::NodeBaseInterface::WeakPtr,
-    std::owner_less<rclcpp::CallbackGroup::WeakPtr>> WeakCallbackGroupsToNodesMap;
-
-class StaticExecutorEntitiesCollector final
-  : public rclcpp::Waitable,
-  public std::enable_shared_from_this<StaticExecutorEntitiesCollector>
-{
-public:
-  RCLCPP_SMART_PTR_DEFINITIONS(StaticExecutorEntitiesCollector)
-
-  // Constructor
-  RCLCPP_PUBLIC
-  StaticExecutorEntitiesCollector() = default;
-
-  // Destructor
-  RCLCPP_PUBLIC
-  ~StaticExecutorEntitiesCollector();
-
-  /// Initialize StaticExecutorEntitiesCollector
-  /**
-   * \param p_wait_set A reference to the wait set to be used in the executor
-   * \param memory_strategy Shared pointer to the memory strategy to set.
-   * \throws std::runtime_error if memory strategy is null
-   */
-  RCLCPP_PUBLIC
-  void
-  init(
-    rcl_wait_set_t * p_wait_set,
-    rclcpp::memory_strategy::MemoryStrategy::SharedPtr memory_strategy);
-
-  /// Finalize StaticExecutorEntitiesCollector to clear resources
-  RCLCPP_PUBLIC
-  bool
-  is_init() {return initialized_;}
-
-  RCLCPP_PUBLIC
-  void
-  fini();
-
-  /// Execute the waitable.
-  RCLCPP_PUBLIC
-  void
-  execute(std::shared_ptr<void> & data) override;
-
-  /// Take the data so that it can be consumed with `execute`.
-  /**
-   * For `StaticExecutorEntitiesCollector`, this always return `nullptr`.
-   * \sa rclcpp::Waitable::take_data()
-   */
-  RCLCPP_PUBLIC
-  std::shared_ptr<void>
-  take_data() override;
-
-  /// Function to add_handles_to_wait_set and wait for work and
-  /**
-   * block until the wait set is ready or until the timeout has been exceeded.
-   * \throws std::runtime_error if wait set couldn't be cleared or filled.
-   * \throws any rcl errors from rcl_wait, \see rclcpp::exceptions::throw_from_rcl_error()
-   */
-  RCLCPP_PUBLIC
-  void
-  refresh_wait_set(std::chrono::nanoseconds timeout = std::chrono::nanoseconds(-1));
-
-  /**
-   * \throws std::runtime_error if it couldn't add guard condition to wait set
-   */
-  RCLCPP_PUBLIC
-  void
-  add_to_wait_set(rcl_wait_set_t * wait_set) override;
-
-  RCLCPP_PUBLIC
-  size_t
-  get_number_of_ready_guard_conditions() override;
-
-  /// Add a callback group to an executor.
-  /**
-   * \see rclcpp::Executor::add_callback_group
-   */
-  RCLCPP_PUBLIC
-  bool
-  add_callback_group(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr);
-
-  /// Add a callback group to an executor.
-  /**
-   * \see rclcpp::Executor::add_callback_group
-   * \return boolean whether the node from the callback group is new
-   */
-  RCLCPP_PUBLIC
-  bool
-  add_callback_group(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    WeakCallbackGroupsToNodesMap & weak_groups_to_nodes);
-
-  /// Remove a callback group from the executor.
-  /**
-   * \see rclcpp::Executor::remove_callback_group
-   */
-  RCLCPP_PUBLIC
-  bool
-  remove_callback_group(
-    rclcpp::CallbackGroup::SharedPtr group_ptr);
-
-  /// Remove a callback group from the executor.
-  /**
-   * \see rclcpp::Executor::remove_callback_group_from_map
-   */
-  RCLCPP_PUBLIC
-  bool
-  remove_callback_group_from_map(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    WeakCallbackGroupsToNodesMap & weak_groups_to_nodes);
-
-  /**
-   * \see rclcpp::Executor::add_node()
-   * \throw std::runtime_error if node was already added
-   */
-  RCLCPP_PUBLIC
-  bool
-  add_node(
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr);
-
-  /**
-   * \see rclcpp::Executor::remove_node()
-   * \throw std::runtime_error if no guard condition is associated with node.
-   */
-  RCLCPP_PUBLIC
-  bool
-  remove_node(
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr);
-
-  RCLCPP_PUBLIC
-  std::vector<rclcpp::CallbackGroup::WeakPtr>
-  get_all_callback_groups();
-
-  /// Get callback groups that belong to executor.
-  /**
-   * \see rclcpp::Executor::get_manually_added_callback_groups()
-   */
-  RCLCPP_PUBLIC
-  std::vector<rclcpp::CallbackGroup::WeakPtr>
-  get_manually_added_callback_groups();
-
-  /// Get callback groups that belong to executor.
-  /**
-   * \see rclcpp::Executor::get_automatically_added_callback_groups_from_nodes()
-   */
-  RCLCPP_PUBLIC
-  std::vector<rclcpp::CallbackGroup::WeakPtr>
-  get_automatically_added_callback_groups_from_nodes();
-
-  /// Complete all available queued work without blocking.
-  /**
-   * This function checks if after the guard condition was triggered
-   * (or a spurious wakeup happened) we are really ready to execute
-   * i.e. re-collect entities
-   */
-  RCLCPP_PUBLIC
-  bool
-  is_ready(rcl_wait_set_t * wait_set) override;
-
-  /// Return number of timers
-  /**
-   * \return number of timers
-   */
-  RCLCPP_PUBLIC
-  size_t
-  get_number_of_timers() {return exec_list_.number_of_timers;}
-
-  /// Return number of subscriptions
-  /**
-   * \return number of subscriptions
-   */
-  RCLCPP_PUBLIC
-  size_t
-  get_number_of_subscriptions() {return exec_list_.number_of_subscriptions;}
-
-  /// Return number of services
-  /**
-   * \return number of services
-   */
-  RCLCPP_PUBLIC
-  size_t
-  get_number_of_services() {return exec_list_.number_of_services;}
-
-  /// Return number of clients
-  /**
-   * \return number of clients
-   */
-  RCLCPP_PUBLIC
-  size_t
-  get_number_of_clients() {return exec_list_.number_of_clients;}
-
-  /// Return number of waitables
-  /**
-   * \return number of waitables
-   */
-  RCLCPP_PUBLIC
-  size_t
-  get_number_of_waitables() {return exec_list_.number_of_waitables;}
-
-  /** Return a SubscritionBase Sharedptr by index.
-   * \param[in] i The index of the SubscritionBase
-   * \return a SubscritionBase shared pointer
-   * \throws std::out_of_range if the argument is higher than the size of the structrue.
-   */
-  RCLCPP_PUBLIC
-  rclcpp::SubscriptionBase::SharedPtr
-  get_subscription(size_t i) {return exec_list_.subscription[i];}
-
-  /** Return a TimerBase Sharedptr by index.
-   * \param[in] i The index of the TimerBase
-   * \return a TimerBase shared pointer
-   * \throws std::out_of_range if the argument is higher than the size.
-   */
-  RCLCPP_PUBLIC
-  rclcpp::TimerBase::SharedPtr
-  get_timer(size_t i) {return exec_list_.timer[i];}
-
-  /** Return a ServiceBase Sharedptr by index.
-   * \param[in] i The index of the ServiceBase
-   * \return a ServiceBase shared pointer
-   * \throws std::out_of_range if the argument is higher than the size.
-   */
-  RCLCPP_PUBLIC
-  rclcpp::ServiceBase::SharedPtr
-  get_service(size_t i) {return exec_list_.service[i];}
-
-  /** Return a ClientBase Sharedptr by index
-   * \param[in] i The index of the ClientBase
-   * \return a ClientBase shared pointer
-   * \throws std::out_of_range if the argument is higher than the size.
-   */
-  RCLCPP_PUBLIC
-  rclcpp::ClientBase::SharedPtr
-  get_client(size_t i) {return exec_list_.client[i];}
-
-  /** Return a Waitable Sharedptr by index
-   * \param[in] i The index of the Waitable
-   * \return a Waitable shared pointer
-   * \throws std::out_of_range if the argument is higher than the size.
-   */
-  RCLCPP_PUBLIC
-  rclcpp::Waitable::SharedPtr
-  get_waitable(size_t i) {return exec_list_.waitable[i];}
-
-private:
-  /// Function to reallocate space for entities in the wait set.
-  /**
-   * \throws std::runtime_error if wait set couldn't be cleared or resized.
-   */
-  void
-  prepare_wait_set();
-
-  void
-  fill_executable_list();
-
-  void
-  fill_memory_strategy();
-
-  /// Return true if the node belongs to the collector
-  /**
-   * \param[in] node_ptr a node base interface shared pointer
-   * \param[in] weak_groups_to_nodes map to nodes to lookup
-   * \return boolean whether a node belongs the collector
-   */
-  bool
-  has_node(
-    const rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    const WeakCallbackGroupsToNodesMap & weak_groups_to_nodes) const;
-
-  /// Add all callback groups that can be automatically added by any executor
-  /// and is not already associated with an executor from nodes
-  /// that are associated with executor
-  /**
-   * \see rclcpp::Executor::add_callback_groups_from_nodes_associated_to_executor()
-   */
-  void
-  add_callback_groups_from_nodes_associated_to_executor();
-
-  void
-  fill_executable_list_from_map(const WeakCallbackGroupsToNodesMap & weak_groups_to_nodes);
-
-  /// Memory strategy: an interface for handling user-defined memory allocation strategies.
-  rclcpp::memory_strategy::MemoryStrategy::SharedPtr memory_strategy_;
-
-  // maps callback groups to nodes.
-  WeakCallbackGroupsToNodesMap weak_groups_associated_with_executor_to_nodes_;
-  // maps callback groups to nodes.
-  WeakCallbackGroupsToNodesMap weak_groups_to_nodes_associated_with_executor_;
-
-  typedef std::map<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr,
-      const rclcpp::GuardCondition *,
-      std::owner_less<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr>>
-    WeakNodesToGuardConditionsMap;
-  WeakNodesToGuardConditionsMap weak_nodes_to_guard_conditions_;
-
-  /// List of weak nodes registered in the static executor
-  std::list<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr> weak_nodes_;
-
-  // Mutex to protect vector of new nodes.
-  std::mutex new_nodes_mutex_;
-  std::vector<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr> new_nodes_;
-
-  /// Wait set for managing entities that the rmw layer waits on.
-  rcl_wait_set_t * p_wait_set_ = nullptr;
-
-  /// Executable list: timers, subscribers, clients, services and waitables
-  rclcpp::experimental::ExecutableList exec_list_;
-
-  /// Bool to check if the entities collector has been initialized
-  bool initialized_ = false;
-};
-
-}  // namespace executors
-}  // namespace rclcpp
-
-#endif  // RCLCPP__EXECUTORS__STATIC_EXECUTOR_ENTITIES_COLLECTOR_HPP_
diff --git a/rclcpp/include/rclcpp/executors/static_single_threaded_executor.hpp b/rclcpp/include/rclcpp/executors/static_single_threaded_executor.hpp
index 5294605eaf..6f22909caf 100644
--- a/rclcpp/include/rclcpp/executors/static_single_threaded_executor.hpp
+++ b/rclcpp/include/rclcpp/executors/static_single_threaded_executor.hpp
@@ -15,24 +15,13 @@
 #ifndef RCLCPP__EXECUTORS__STATIC_SINGLE_THREADED_EXECUTOR_HPP_
 #define RCLCPP__EXECUTORS__STATIC_SINGLE_THREADED_EXECUTOR_HPP_
 
+#include <atomic>
 #include <chrono>
-#include <cassert>
-#include <cstdlib>
 #include <memory>
-#include <vector>
-#include <string>
-
-#include "rmw/rmw.h"
 
 #include "rclcpp/executor.hpp"
-#include "rclcpp/executors/static_executor_entities_collector.hpp"
-#include "rclcpp/experimental/executable_list.hpp"
-#include "rclcpp/macros.hpp"
-#include "rclcpp/memory_strategies.hpp"
-#include "rclcpp/node.hpp"
-#include "rclcpp/rate.hpp"
-#include "rclcpp/utilities.hpp"
-#include "rclcpp/visibility_control.hpp"
+#include "rclcpp/executors/executor_entities_collection.hpp"
+#include "rclcpp/executors/single_threaded_executor.hpp"
 
 namespace rclcpp
 {
@@ -65,7 +54,7 @@ class StaticSingleThreadedExecutor : public rclcpp::Executor
   explicit StaticSingleThreadedExecutor(
     const rclcpp::ExecutorOptions & options = rclcpp::ExecutorOptions());
 
-  /// Default destrcutor.
+  /// Default destructor.
   RCLCPP_PUBLIC
   virtual ~StaticSingleThreadedExecutor();
 
@@ -116,105 +105,31 @@ class StaticSingleThreadedExecutor : public rclcpp::Executor
   void
   spin_all(std::chrono::nanoseconds max_duration) override;
 
-  /// Add a callback group to an executor.
-  /**
-   * \sa rclcpp::Executor::add_callback_group
-   */
-  RCLCPP_PUBLIC
-  void
-  add_callback_group(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    bool notify = true) override;
-
-  /// Remove callback group from the executor
-  /**
-   * \sa rclcpp::Executor::remove_callback_group
-   */
-  RCLCPP_PUBLIC
-  void
-  remove_callback_group(
-    rclcpp::CallbackGroup::SharedPtr group_ptr,
-    bool notify = true) override;
-
-  /// Add a node to the executor.
-  /**
-   * \sa rclcpp::Executor::add_node
-   */
-  RCLCPP_PUBLIC
-  void
-  add_node(
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    bool notify = true) override;
-
-  /// Convenience function which takes Node and forwards NodeBaseInterface.
-  /**
-   * \sa rclcpp::StaticSingleThreadedExecutor::add_node
-   */
-  RCLCPP_PUBLIC
-  void
-  add_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify = true) override;
-
-  /// Remove a node from the executor.
-  /**
-   * \sa rclcpp::Executor::remove_node
-   */
-  RCLCPP_PUBLIC
-  void
-  remove_node(
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-    bool notify = true) override;
-
-  /// Convenience function which takes Node and forwards NodeBaseInterface.
-  /**
-   * \sa rclcpp::Executor::remove_node
-   */
-  RCLCPP_PUBLIC
-  void
-  remove_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify = true) override;
-
-  RCLCPP_PUBLIC
-  std::vector<rclcpp::CallbackGroup::WeakPtr>
-  get_all_callback_groups() override;
-
-  /// Get callback groups that belong to executor.
-  /**
-   * \sa rclcpp::Executor::get_manually_added_callback_groups()
-   */
-  RCLCPP_PUBLIC
-  std::vector<rclcpp::CallbackGroup::WeakPtr>
-  get_manually_added_callback_groups() override;
-
-  /// Get callback groups that belong to executor.
-  /**
-   * \sa rclcpp::Executor::get_automatically_added_callback_groups_from_nodes()
-   */
-  RCLCPP_PUBLIC
-  std::vector<rclcpp::CallbackGroup::WeakPtr>
-  get_automatically_added_callback_groups_from_nodes() override;
-
 protected:
   /**
    * @brief Executes ready executables from wait set.
+   * @param collection entities to evaluate for ready executables.
+   * @param wait_result result to check for ready executables.
    * @param spin_once if true executes only the first ready executable.
    * @return true if any executable was ready.
    */
-  RCLCPP_PUBLIC
   bool
-  execute_ready_executables(bool spin_once = false);
+  execute_ready_executables(
+    const rclcpp::executors::ExecutorEntitiesCollection & collection,
+    rclcpp::WaitResult<rclcpp::WaitSet> & wait_result,
+    bool spin_once);
 
-  RCLCPP_PUBLIC
   void
   spin_some_impl(std::chrono::nanoseconds max_duration, bool exhaustive);
 
-  RCLCPP_PUBLIC
   void
   spin_once_impl(std::chrono::nanoseconds timeout) override;
 
+  std::optional<rclcpp::WaitResult<rclcpp::WaitSet>>
+  collect_and_wait(std::chrono::nanoseconds timeout);
+
 private:
   RCLCPP_DISABLE_COPY(StaticSingleThreadedExecutor)
-
-  StaticExecutorEntitiesCollector::SharedPtr entities_collector_;
 };
 
 }  // namespace executors
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/blockingconcurrentqueue.h b/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/blockingconcurrentqueue.h
new file mode 100644
index 0000000000..3fcbd53bc5
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/blockingconcurrentqueue.h
@@ -0,0 +1,582 @@
+// Provides an efficient blocking version of moodycamel::ConcurrentQueue.
+// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified
+// BSD license, available at the top of concurrentqueue.h.
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+// Uses Jeff Preshing's semaphore implementation (under the terms of its
+// separate zlib license, see lightweightsemaphore.h).
+
+#pragma once
+
+#include "concurrentqueue.h"
+#include "lightweightsemaphore.h"
+
+#include <type_traits>
+#include <cerrno>
+#include <memory>
+#include <chrono>
+#include <ctime>
+
+namespace moodycamel
+{
+// This is a blocking version of the queue. It has an almost identical interface to
+// the normal non-blocking version, with the addition of various wait_dequeue() methods
+// and the removal of producer-specific dequeue methods.
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class BlockingConcurrentQueue
+{
+private:
+    typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
+    typedef ::moodycamel::LightweightSemaphore LightweightSemaphore;
+
+public:
+    typedef typename ConcurrentQueue::producer_token_t producer_token_t;
+    typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
+    
+    typedef typename ConcurrentQueue::index_t index_t;
+    typedef typename ConcurrentQueue::size_t size_t;
+    typedef typename std::make_signed<size_t>::type ssize_t;
+    
+    static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
+    static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
+    static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
+    static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
+    static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+    static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
+    static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
+    
+public:
+    // Creates a queue with at least `capacity` element slots; note that the
+    // actual number of elements that can be inserted without additional memory
+    // allocation depends on the number of producers and the block size (e.g. if
+    // the block size is equal to `capacity`, only a single block will be allocated
+    // up-front, which means only a single producer will be able to enqueue elements
+    // without an extra allocation -- blocks aren't shared between producers).
+    // This method is not thread safe -- it is up to the user to ensure that the
+    // queue is fully constructed before it starts being used by other threads (this
+    // includes making the memory effects of construction visible, possibly with a
+    // memory barrier).
+    explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+        : inner(capacity), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+    {
+        assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+        if (!sema) {
+            MOODYCAMEL_THROW(std::bad_alloc());
+        }
+    }
+    
+    BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+        : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+    {
+        assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+        if (!sema) {
+            MOODYCAMEL_THROW(std::bad_alloc());
+        }
+    }
+    
+    // Disable copying and copy assignment
+    BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+    BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+    
+    // Moving is supported, but note that it is *not* a thread-safe operation.
+    // Nobody can use the queue while it's being moved, and the memory effects
+    // of that move must be propagated to other threads before they can use it.
+    // Note: When a queue is moved, its tokens are still valid but can only be
+    // used with the destination queue (i.e. semantically they are moved along
+    // with the queue itself).
+    BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+        : inner(std::move(other.inner)), sema(std::move(other.sema))
+    { }
+    
+    inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+    {
+        return swap_internal(other);
+    }
+    
+    // Swaps this queue's state with the other's. Not thread-safe.
+    // Swapping two queues does not invalidate their tokens, however
+    // the tokens that were created for one queue must be used with
+    // only the swapped queue (i.e. the tokens are tied to the
+    // queue's movable state, not the object itself).
+    inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+    {
+        swap_internal(other);
+    }
+    
+private:
+    BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
+    {
+        if (this == &other) {
+            return *this;
+        }
+        
+        inner.swap(other.inner);
+        sema.swap(other.sema);
+        return *this;
+    }
+    
+public:
+    // Enqueues a single item (by copying it).
+    // Allocates memory if required. Only fails if memory allocation fails (or implicit
+    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(T const& item)
+    {
+        if ((details::likely)(inner.enqueue(item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by moving it, if possible).
+    // Allocates memory if required. Only fails if memory allocation fails (or implicit
+    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(T&& item)
+    {
+        if ((details::likely)(inner.enqueue(std::move(item)))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by copying it) using an explicit producer token.
+    // Allocates memory if required. Only fails if memory allocation fails (or
+    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(producer_token_t const& token, T const& item)
+    {
+        if ((details::likely)(inner.enqueue(token, item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by moving it, if possible) using an explicit producer token.
+    // Allocates memory if required. Only fails if memory allocation fails (or
+    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(producer_token_t const& token, T&& item)
+    {
+        if ((details::likely)(inner.enqueue(token, std::move(item)))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues several items.
+    // Allocates memory if required. Only fails if memory allocation fails (or
+    // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+    // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool enqueue_bulk(It itemFirst, size_t count)
+    {
+        if ((details::likely)(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues several items using an explicit producer token.
+    // Allocates memory if required. Only fails if memory allocation fails
+    // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Note: Use std::make_move_iterator if the elements should be moved
+    // instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+    {
+        if ((details::likely)(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by copying it).
+    // Does not allocate memory. Fails if not enough room to enqueue (or implicit
+    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+    // is 0).
+    // Thread-safe.
+    inline bool try_enqueue(T const& item)
+    {
+        if (inner.try_enqueue(item)) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by moving it, if possible).
+    // Does not allocate memory (except for one-time implicit producer).
+    // Fails if not enough room to enqueue (or implicit production is
+    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+    // Thread-safe.
+    inline bool try_enqueue(T&& item)
+    {
+        if (inner.try_enqueue(std::move(item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by copying it) using an explicit producer token.
+    // Does not allocate memory. Fails if not enough room to enqueue.
+    // Thread-safe.
+    inline bool try_enqueue(producer_token_t const& token, T const& item)
+    {
+        if (inner.try_enqueue(token, item)) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues a single item (by moving it, if possible) using an explicit producer token.
+    // Does not allocate memory. Fails if not enough room to enqueue.
+    // Thread-safe.
+    inline bool try_enqueue(producer_token_t const& token, T&& item)
+    {
+        if (inner.try_enqueue(token, std::move(item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues several items.
+    // Does not allocate memory (except for one-time implicit producer).
+    // Fails if not enough room to enqueue (or implicit production is
+    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+    // Note: Use std::make_move_iterator if the elements should be moved
+    // instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool try_enqueue_bulk(It itemFirst, size_t count)
+    {
+        if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    
+    // Enqueues several items using an explicit producer token.
+    // Does not allocate memory. Fails if not enough room to enqueue.
+    // Note: Use std::make_move_iterator if the elements should be moved
+    // instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+    {
+        if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    
+    
+    // Attempts to dequeue from the queue.
+    // Returns false if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline bool try_dequeue(U& item)
+    {
+        if (sema->tryWait()) {
+            while (!inner.try_dequeue(item)) {
+                continue;
+            }
+            return true;
+        }
+        return false;
+    }
+    
+    // Attempts to dequeue from the queue using an explicit consumer token.
+    // Returns false if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline bool try_dequeue(consumer_token_t& token, U& item)
+    {
+        if (sema->tryWait()) {
+            while (!inner.try_dequeue(token, item)) {
+                continue;
+            }
+            return true;
+        }
+        return false;
+    }
+    
+    // Attempts to dequeue several elements from the queue.
+    // Returns the number of items actually dequeued.
+    // Returns 0 if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t try_dequeue_bulk(It itemFirst, size_t max)
+    {
+        size_t count = 0;
+        max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+        }
+        return count;
+    }
+    
+    // Attempts to dequeue several elements from the queue using an explicit consumer token.
+    // Returns the number of items actually dequeued.
+    // Returns 0 if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+    {
+        size_t count = 0;
+        max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+        }
+        return count;
+    }
+    
+    
+    
+    // Blocks the current thread until there's something to dequeue, then
+    // dequeues it.
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline void wait_dequeue(U& item)
+    {
+        while (!sema->wait()) {
+            continue;
+        }
+        while (!inner.try_dequeue(item)) {
+            continue;
+        }
+    }
+
+    // Blocks the current thread until either there's something to dequeue
+    // or the timeout (specified in microseconds) expires. Returns false
+    // without setting `item` if the timeout expires, otherwise assigns
+    // to `item` and returns true.
+    // Using a negative timeout indicates an indefinite timeout,
+    // and is thus functionally equivalent to calling wait_dequeue.
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs)
+    {
+        if (!sema->wait(timeout_usecs)) {
+            return false;
+        }
+        while (!inner.try_dequeue(item)) {
+            continue;
+        }
+        return true;
+    }
+    
+    // Blocks the current thread until either there's something to dequeue
+    // or the timeout expires. Returns false without setting `item` if the
+    // timeout expires, otherwise assigns to `item` and returns true.
+    // Never allocates. Thread-safe.
+    template<typename U, typename Rep, typename Period>
+    inline bool wait_dequeue_timed(U& item, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_timed(item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+    
+    // Blocks the current thread until there's something to dequeue, then
+    // dequeues it using an explicit consumer token.
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline void wait_dequeue(consumer_token_t& token, U& item)
+    {
+        while (!sema->wait()) {
+            continue;
+        }
+        while (!inner.try_dequeue(token, item)) {
+            continue;
+        }
+    }
+    
+    // Blocks the current thread until either there's something to dequeue
+    // or the timeout (specified in microseconds) expires. Returns false
+    // without setting `item` if the timeout expires, otherwise assigns
+    // to `item` and returns true.
+    // Using a negative timeout indicates an indefinite timeout,
+    // and is thus functionally equivalent to calling wait_dequeue.
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs)
+    {
+        if (!sema->wait(timeout_usecs)) {
+            return false;
+        }
+        while (!inner.try_dequeue(token, item)) {
+            continue;
+        }
+        return true;
+    }
+    
+    // Blocks the current thread until either there's something to dequeue
+    // or the timeout expires. Returns false without setting `item` if the
+    // timeout expires, otherwise assigns to `item` and returns true.
+    // Never allocates. Thread-safe.
+    template<typename U, typename Rep, typename Period>
+    inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_timed(token, item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+    
+    // Attempts to dequeue several elements from the queue.
+    // Returns the number of items actually dequeued, which will
+    // always be at least one (this method blocks until the queue
+    // is non-empty) and at most max.
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
+    {
+        size_t count = 0;
+        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+        }
+        return count;
+    }
+    
+    // Attempts to dequeue several elements from the queue.
+    // Returns the number of items actually dequeued, which can
+    // be 0 if the timeout expires while waiting for elements,
+    // and at most max.
+    // Using a negative timeout indicates an indefinite timeout,
+    // and is thus functionally equivalent to calling wait_dequeue_bulk.
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs)
+    {
+        size_t count = 0;
+        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+        }
+        return count;
+    }
+    
+    // Attempts to dequeue several elements from the queue.
+    // Returns the number of items actually dequeued, which can
+    // be 0 if the timeout expires while waiting for elements,
+    // and at most max.
+    // Never allocates. Thread-safe.
+    template<typename It, typename Rep, typename Period>
+    inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_bulk_timed<It&>(itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+    
+    // Attempts to dequeue several elements from the queue using an explicit consumer token.
+    // Returns the number of items actually dequeued, which will
+    // always be at least one (this method blocks until the queue
+    // is non-empty) and at most max.
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+    {
+        size_t count = 0;
+        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+        }
+        return count;
+    }
+    
+    // Attempts to dequeue several elements from the queue using an explicit consumer token.
+    // Returns the number of items actually dequeued, which can
+    // be 0 if the timeout expires while waiting for elements,
+    // and at most max.
+    // Using a negative timeout indicates an indefinite timeout,
+    // and is thus functionally equivalent to calling wait_dequeue_bulk.
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs)
+    {
+        size_t count = 0;
+        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+        }
+        return count;
+    }
+    
+    // Attempts to dequeue several elements from the queue using an explicit consumer token.
+    // Returns the number of items actually dequeued, which can
+    // be 0 if the timeout expires while waiting for elements,
+    // and at most max.
+    // Never allocates. Thread-safe.
+    template<typename It, typename Rep, typename Period>
+    inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_bulk_timed<It&>(token, itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+    
+    
+    // Returns an estimate of the total number of elements currently in the queue. This
+    // estimate is only accurate if the queue has completely stabilized before it is called
+    // (i.e. all enqueue and dequeue operations have completed and their memory effects are
+    // visible on the calling thread, and no further operations start while this method is
+    // being called).
+    // Thread-safe.
+    inline size_t size_approx() const
+    {
+        return (size_t)sema->availableApprox();
+    }
+    
+    
+    // Returns true if the underlying atomic variables used by
+    // the queue are lock-free (they should be on most platforms).
+    // Thread-safe.
+    static bool is_lock_free()
+    {
+        return ConcurrentQueue::is_lock_free();
+    }
+    
+
+private:
+    template<typename U, typename A1, typename A2>
+    static inline U* create(A1&& a1, A2&& a2)
+    {
+        void* p = (Traits::malloc)(sizeof(U));
+        return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) : nullptr;
+    }
+    
+    template<typename U>
+    static inline void destroy(U* p)
+    {
+        if (p != nullptr) {
+            p->~U();
+        }
+        (Traits::free)(p);
+    }
+    
+private:
+    ConcurrentQueue inner;
+    std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
+};
+
+
+template<typename T, typename Traits>
+inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+    a.swap(b);
+}
+
+}   // end namespace moodycamel
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/concurrentqueue.h b/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/concurrentqueue.h
new file mode 100644
index 0000000000..d4b8e78e2f
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/concurrentqueue.h
@@ -0,0 +1,3742 @@
+// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
+// An overview, including benchmark results, is provided here:
+//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
+// The full design is also described in excruciating detail at:
+//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
+
+// Simplified BSD license:
+// Copyright (c) 2013-2020, Cameron Desrochers.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice, this list of
+// conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+
+#pragma once
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
+// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
+// upon assigning any computed values)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+
+#ifdef MCDBGQ_USE_RELACY
+#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
+#endif
+#endif
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#endif
+
+#ifdef MCDBGQ_USE_RELACY
+#include "relacy/relacy_std.hpp"
+#include "relacy_shims.h"
+// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
+// We'll override the default trait malloc ourselves without a macro.
+#undef new
+#undef delete
+#undef malloc
+#undef free
+#else
+#include <atomic>		// Requires C++11. Sorry VS2010.
+#include <cassert>
+#endif
+#include <cstddef>              // for max_align_t
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
+#include <utility>
+#include <limits>
+#include <climits>		// for CHAR_BIT
+#include <array>
+#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
+
+// Platform-specific definitions of a numeric thread ID type and an invalid value
+namespace moodycamel { namespace details {
+	template<typename thread_id_t> struct thread_id_converter {
+		typedef thread_id_t thread_id_numeric_size_t;
+		typedef thread_id_t thread_id_hash_t;
+		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
+	};
+} }
+#if defined(MCDBGQ_USE_RELACY)
+namespace moodycamel { namespace details {
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
+	static inline thread_id_t thread_id() { return rl::thread_index(); }
+} }
+#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
+// No sense pulling in windows.h in a header, we'll manually declare the function
+// we use and rely on backwards-compatibility for this not to break
+extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
+namespace moodycamel { namespace details {
+	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
+	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
+} }
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
+namespace moodycamel { namespace details {
+	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
+	
+	typedef std::thread::id thread_id_t;
+	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
+
+	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
+	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
+	// be.
+	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
+
+	template<std::size_t> struct thread_id_size { };
+	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
+	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
+
+	template<> struct thread_id_converter<thread_id_t> {
+		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
+#ifndef __APPLE__
+		typedef std::size_t thread_id_hash_t;
+#else
+		typedef thread_id_numeric_size_t thread_id_hash_t;
+#endif
+
+		static thread_id_hash_t prehash(thread_id_t const& x)
+		{
+#ifndef __APPLE__
+			return std::hash<std::thread::id>()(x);
+#else
+			return *reinterpret_cast<thread_id_hash_t const*>(&x);
+#endif
+		}
+	};
+} }
+#else
+// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
+// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
+// static variable's address as a thread identifier :-)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define MOODYCAMEL_THREADLOCAL __thread
+#elif defined(_MSC_VER)
+#define MOODYCAMEL_THREADLOCAL __declspec(thread)
+#else
+// Assume C++11 compliant compiler
+#define MOODYCAMEL_THREADLOCAL thread_local
+#endif
+namespace moodycamel { namespace details {
+	typedef std::uintptr_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
+	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
+	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
+} }
+#endif
+
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
+#define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
+// Exceptions
+#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
+#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
+#define MOODYCAMEL_EXCEPTIONS_ENABLED
+#endif
+#endif
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
+#define MOODYCAMEL_TRY try
+#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
+#define MOODYCAMEL_RETHROW throw
+#define MOODYCAMEL_THROW(expr) throw (expr)
+#else
+#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
+#define MOODYCAMEL_RETHROW
+#define MOODYCAMEL_THROW(expr)
+#endif
+
+#ifndef MOODYCAMEL_NOEXCEPT
+#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
+#define MOODYCAMEL_NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
+// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
+// We have to assume *all* non-trivial constructors may throw on VS2012!
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#else
+#define MOODYCAMEL_NOEXCEPT noexcept
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
+#endif
+#endif
+
+#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#else
+// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+// g++ <=4.7 doesn't support thread_local either.
+// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
+//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // always disabled for now since several users report having problems with it on
+#endif
+#endif
+#endif
+
+// VS2012 doesn't support deleted functions. 
+// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
+#ifndef MOODYCAMEL_DELETE_FUNCTION
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#define MOODYCAMEL_DELETE_FUNCTION
+#else
+#define MOODYCAMEL_DELETE_FUNCTION = delete
+#endif
+#endif
+
+namespace moodycamel { namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
+	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
+	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
+	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
+	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
+	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
+	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
+	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
+	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
+	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
+#else
+	template<typename T> struct identity { typedef T type; };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
+#endif
+#endif
+} }
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+ #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
+// Compiler-specific likely/unlikely hints
+namespace moodycamel { namespace details {
+#if defined(__GNUC__)
+	static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
+	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
+#else
+	static inline bool (likely)(bool x) { return x; }
+	static inline bool (unlikely)(bool x) { return x; }
+#endif
+} }
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+#include "internal/concurrentqueue_internal_debug.h"
+#endif
+
+namespace moodycamel {
+namespace details {
+	template<typename T>
+	struct const_numeric_max {
+		static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
+		static const T value = std::numeric_limits<T>::is_signed
+			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
+			: static_cast<T>(-1);
+	};
+
+#if defined(__GLIBCXX__)
+	typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
+#else
+	typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
+#endif
+
+	// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
+	// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
+	typedef union {
+		std_max_align_t x;
+		long long y;
+		void* z;
+	} max_align_t;
+}
+
+// Default traits for the ConcurrentQueue. To change some of the
+// traits without re-implementing all of them, inherit from this
+// struct and shadow the declarations you wish to be different;
+// since the traits are used as a template type parameter, the
+// shadowed declarations will be used where defined, and the defaults
+// otherwise.
+struct ConcurrentQueueDefaultTraits
+{
+	// General-purpose size type. std::size_t is strongly recommended.
+	typedef std::size_t size_t;
+	
+	// The type used for the enqueue and dequeue indices. Must be at least as
+	// large as size_t. Should be significantly larger than the number of elements
+	// you expect to hold at once, especially if you have a high turnover rate;
+	// for example, on 32-bit x86, if you expect to have over a hundred million
+	// elements or pump several million elements through your queue in a very
+	// short space of time, using a 32-bit type *may* trigger a race condition.
+	// A 64-bit int type is recommended in that case, and in practice will
+	// prevent a race condition no matter the usage of the queue. Note that
+	// whether the queue is lock-free with a 64-int type depends on the whether
+	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
+	typedef std::size_t index_t;
+	
+	// Internally, all elements are enqueued and dequeued from multi-element
+	// blocks; this is the smallest controllable unit. If you expect few elements
+	// but many producers, a smaller block size should be favoured. For few producers
+	// and/or many elements, a larger block size is preferred. A sane default
+	// is provided. Must be a power of 2.
+	static const size_t BLOCK_SIZE = 32;
+	
+	// For explicit producers (i.e. when using a producer token), the block is
+	// checked for being empty by iterating through a list of flags, one per element.
+	// For large block sizes, this is too inefficient, and switching to an atomic
+	// counter-based approach is faster. The switch is made for block sizes strictly
+	// larger than this threshold.
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
+	
+	// How many full blocks can be expected for a single explicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
+	
+	// How many full blocks can be expected for a single implicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
+	
+	// The initial size of the hash table mapping thread IDs to implicit producers.
+	// Note that the hash is resized every time it becomes half full.
+	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
+	// (using the enqueue methods without an explicit producer token) is disabled.
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
+	
+	// Controls the number of items that an explicit consumer (i.e. one with a token)
+	// must consume before it causes all consumers to rotate and move on to the next
+	// internal queue.
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
+	
+	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
+	// Enqueue operations that would cause this limit to be surpassed will fail. Note
+	// that this limit is enforced at the block level (for performance reasons), i.e.
+	// it's rounded up to the nearest block size.
+	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
+
+	// The number of times to spin before sleeping when waiting on a semaphore.
+	// Recommended values are on the order of 1000-10000 unless the number of
+	// consumer threads exceeds the number of idle cores (in which case try 0-100).
+	// Only affects instances of the BlockingConcurrentQueue.
+	static const int MAX_SEMA_SPINS = 0;
+	
+	
+#ifndef MCDBGQ_USE_RELACY
+	// Memory allocation can be customized if needed.
+	// malloc should return nullptr on failure, and handle alignment like std::malloc.
+#if defined(malloc) || defined(free)
+	// Gah, this is 2015, stop defining macros that break standard code already!
+	// Work around malloc/free being special macros:
+	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
+	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
+	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
+	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
+#else
+	static inline void* malloc(size_t size) { return std::malloc(size); }
+	static inline void free(void* ptr) { return std::free(ptr); }
+#endif
+#else
+	// Debug versions when running under the Relacy race detector (ignore
+	// these in user code)
+	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
+	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
+#endif
+};
+
+
+// When producing or consuming many elements, the most efficient way is to:
+//    1) Use one of the bulk-operation methods of the queue with a token
+//    2) Failing that, use the bulk-operation methods without a token
+//    3) Failing that, create a token and use that with the single-item methods
+//    4) Failing that, use the single-parameter methods of the queue
+// Having said that, don't create tokens willy-nilly -- ideally there should be
+// a maximum of one token per thread (of each kind).
+struct ProducerToken;
+struct ConsumerToken;
+
+template<typename T, typename Traits> class ConcurrentQueue;
+template<typename T, typename Traits> class BlockingConcurrentQueue;
+class ConcurrentQueueTests;
+
+
+namespace details
+{
+	struct ConcurrentQueueProducerTypelessBase
+	{
+		ConcurrentQueueProducerTypelessBase* next;
+		std::atomic<bool> inactive;
+		ProducerToken* token;
+		
+		ConcurrentQueueProducerTypelessBase()
+			: next(nullptr), inactive(false), token(nullptr)
+		{
+		}
+	};
+	
+	template<bool use32> struct _hash_32_or_64 {
+		static inline std::uint32_t hash(std::uint32_t h)
+		{
+			// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+			// Since the thread ID is already unique, all we really want to do is propagate that
+			// uniqueness evenly across all the bits, so that we can use a subset of the bits while
+			// reducing collisions significantly
+			h ^= h >> 16;
+			h *= 0x85ebca6b;
+			h ^= h >> 13;
+			h *= 0xc2b2ae35;
+			return h ^ (h >> 16);
+		}
+	};
+	template<> struct _hash_32_or_64<1> {
+		static inline std::uint64_t hash(std::uint64_t h)
+		{
+			h ^= h >> 33;
+			h *= 0xff51afd7ed558ccd;
+			h ^= h >> 33;
+			h *= 0xc4ceb9fe1a85ec53;
+			return h ^ (h >> 33);
+		}
+	};
+	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
+	
+	static inline size_t hash_thread_id(thread_id_t id)
+	{
+		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
+		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
+			thread_id_converter<thread_id_t>::prehash(id)));
+	}
+	
+	template<typename T>
+	static inline bool circular_less_than(T a, T b)
+	{
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4554)
+#endif
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
+		return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1));
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+	}
+	
+	template<typename U>
+	static inline char* align_for(char* ptr)
+	{
+		const std::size_t alignment = std::alignment_of<U>::value;
+		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+	}
+
+	template<typename T>
+	static inline T ceil_to_pow_2(T x)
+	{
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");
+
+		// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+		--x;
+		x |= x >> 1;
+		x |= x >> 2;
+		x |= x >> 4;
+		for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
+			x |= x >> (i << 3);
+		}
+		++x;
+		return x;
+	}
+	
+	template<typename T>
+	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
+	{
+		T temp = std::move(left.load(std::memory_order_relaxed));
+		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
+		right.store(std::move(temp), std::memory_order_relaxed);
+	}
+	
+	template<typename T>
+	static inline T const& nomove(T const& x)
+	{
+		return x;
+	}
+	
+	template<bool Enable>
+	struct nomove_if
+	{
+		template<typename T>
+		static inline T const& eval(T const& x)
+		{
+			return x;
+		}
+	};
+	
+	template<>
+	struct nomove_if<false>
+	{
+		template<typename U>
+		static inline auto eval(U&& x)
+			-> decltype(std::forward<U>(x))
+		{
+			return std::forward<U>(x);
+		}
+	};
+	
+	template<typename It>
+	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
+	{
+		return *it;
+	}
+	
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
+#else
+	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
+#endif
+	
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+	typedef RelacyThreadExitListener ThreadExitListener;
+	typedef RelacyThreadExitNotifier ThreadExitNotifier;
+#else
+	struct ThreadExitListener
+	{
+		typedef void (*callback_t)(void*);
+		callback_t callback;
+		void* userData;
+		
+		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
+	};
+	
+	
+	class ThreadExitNotifier
+	{
+	public:
+		static void subscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			listener->next = tlsInst.tail;
+			tlsInst.tail = listener;
+		}
+		
+		static void unsubscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			ThreadExitListener** prev = &tlsInst.tail;
+			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
+				if (ptr == listener) {
+					*prev = ptr->next;
+					break;
+				}
+				prev = &ptr->next;
+			}
+		}
+		
+	private:
+		ThreadExitNotifier() : tail(nullptr) { }
+		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		
+		~ThreadExitNotifier()
+		{
+			// This thread is about to exit, let everyone know!
+			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
+			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
+				ptr->callback(ptr->userData);
+			}
+		}
+		
+		// Thread-local
+		static inline ThreadExitNotifier& instance()
+		{
+			static thread_local ThreadExitNotifier notifier;
+			return notifier;
+		}
+		
+	private:
+		ThreadExitListener* tail;
+	};
+#endif
+#endif
+	
+	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
+	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
+	template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };
+	template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
+	template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
+}
+
+
+struct ProducerToken
+{
+	template<typename T, typename Traits>
+	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
+	
+	template<typename T, typename Traits>
+	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
+	
+	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+		: producer(other.producer)
+	{
+		other.producer = nullptr;
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+	}
+	
+	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+	
+	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(producer, other.producer);
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+		if (other.producer != nullptr) {
+			other.producer->token = &other;
+		}
+	}
+	
+	// A token is always valid unless:
+	//     1) Memory allocation failed during construction
+	//     2) It was moved via the move constructor
+	//        (Note: assignment does a swap, leaving both potentially valid)
+	//     3) The associated queue was destroyed
+	// Note that if valid() returns true, that only indicates
+	// that the token is valid for use with a specific queue,
+	// but not which one; that's up to the user to track.
+	inline bool valid() const { return producer != nullptr; }
+	
+	~ProducerToken()
+	{
+		if (producer != nullptr) {
+			producer->token = nullptr;
+			producer->inactive.store(true, std::memory_order_release);
+		}
+	}
+	
+	// Disable copying and assignment
+	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+	
+protected:
+	details::ConcurrentQueueProducerTypelessBase* producer;
+};
+
+
+struct ConsumerToken
+{
+	template<typename T, typename Traits>
+	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
+	
+	template<typename T, typename Traits>
+	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
+	
+	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
+	{
+	}
+	
+	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+	
+	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(initialOffset, other.initialOffset);
+		std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
+		std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
+		std::swap(currentProducer, other.currentProducer);
+		std::swap(desiredProducer, other.desiredProducer);
+	}
+	
+	// Disable copying and assignment
+	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+	
+private: // but shared with ConcurrentQueue
+	std::uint32_t initialOffset;
+	std::uint32_t lastKnownGlobalOffset;
+	std::uint32_t itemsConsumedFromCurrent;
+	details::ConcurrentQueueProducerTypelessBase* currentProducer;
+	details::ConcurrentQueueProducerTypelessBase* desiredProducer;
+};
+
+// Need to forward-declare this swap because it's in a namespace.
+// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;
+
+
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class ConcurrentQueue
+{
+public:
+	typedef ::moodycamel::ProducerToken producer_token_t;
+	typedef ::moodycamel::ConsumerToken consumer_token_t;
+	
+	typedef typename Traits::index_t index_t;
+	typedef typename Traits::size_t size_t;
+	
+	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
+#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
+#endif
+	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
+	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
+	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
+	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
+	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
+	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
+	static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
+
+public:
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		// Track all the producers using a fully-resolved typed list for
+		// each kind; this makes it possible to debug them starting from
+		// the root queue object (otherwise wacky casts are needed that
+		// don't compile in the debugger's expression evaluator).
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
+	// Computes the correct amount of pre-allocated blocks for you based
+	// on the minimum number of elements you want available at any given
+	// time, and the maximum concurrent number of each type of producer.
+	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
+		populate_initial_block_list(blocks);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
+	// Note: The queue should not be accessed concurrently while it's
+	// being deleted. It's up to the user to synchronize this.
+	// This method is not thread safe.
+	~ConcurrentQueue()
+	{
+		// Destroy producers
+		auto ptr = producerListTail.load(std::memory_order_relaxed);
+		while (ptr != nullptr) {
+			auto next = ptr->next_prod();
+			if (ptr->token != nullptr) {
+				ptr->token->producer = nullptr;
+			}
+			destroy(ptr);
+			ptr = next;
+		}
+		
+		// Destroy implicit producer hash tables
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
+			while (hash != nullptr) {
+				auto prev = hash->prev;
+				if (prev != nullptr) {		// The last hash is part of this object and was not allocated dynamically
+					for (size_t i = 0; i != hash->capacity; ++i) {
+						hash->entries[i].~ImplicitProducerKVP();
+					}
+					hash->~ImplicitProducerHash();
+					(Traits::free)(hash);
+				}
+				hash = prev;
+			}
+		}
+		
+		// Destroy global free list
+		auto block = freeList.head_unsafe();
+		while (block != nullptr) {
+			auto next = block->freeListNext.load(std::memory_order_relaxed);
+			if (block->dynamicallyAllocated) {
+				destroy(block);
+			}
+			block = next;
+		}
+		
+		// Destroy initial free list
+		destroy_array(initialBlockPool, initialBlockPoolSize);
+	}
+
+	// Disable copying and copy assignment
+	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
+		producerCount(other.producerCount.load(std::memory_order_relaxed)),
+		initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
+		initialBlockPool(other.initialBlockPool),
+		initialBlockPoolSize(other.initialBlockPoolSize),
+		freeList(std::move(other.freeList)),
+		nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
+		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
+	{
+		// Move the other one into this, and leave the other one as an empty queue
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		swap_implicit_producer_hashes(other);
+		
+		other.producerListTail.store(nullptr, std::memory_order_relaxed);
+		other.producerCount.store(0, std::memory_order_relaxed);
+		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
+		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+		
+		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
+		other.initialBlockPoolSize = 0;
+		other.initialBlockPool = nullptr;
+		
+		reown_producers();
+	}
+	
+	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+	
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
+	
+private:
+	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+		
+		details::swap_relaxed(producerListTail, other.producerListTail);
+		details::swap_relaxed(producerCount, other.producerCount);
+		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
+		std::swap(initialBlockPool, other.initialBlockPool);
+		std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
+		freeList.swap(other.freeList);
+		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
+		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
+		
+		swap_implicit_producer_hashes(other);
+		
+		reown_producers();
+		other.reown_producers();
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		details::swap_relaxed(explicitProducers, other.explicitProducers);
+		details::swap_relaxed(implicitProducers, other.implicitProducers);
+#endif
+		
+		return *this;
+	}
+	
+public:
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(std::move(item));
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CanAlloc>(token, item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CanAlloc>(token, std::move(item));
+	}
+	
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
+	}
+	
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(std::move(item));
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, std::move(item));
+	}
+	
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
+	}
+	
+	
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(U& item)
+	{
+		// Instead of simply trying each producer in turn (which could cause needless contention on the first
+		// producer), we score them heuristically.
+		size_t nonEmptyCount = 0;
+		ProducerBase* best = nullptr;
+		size_t bestSize = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
+			auto size = ptr->size_approx();
+			if (size > 0) {
+				if (size > bestSize) {
+					bestSize = size;
+					best = ptr;
+				}
+				++nonEmptyCount;
+			}
+		}
+		
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (nonEmptyCount > 0) {
+			if ((details::likely)(best->dequeue(item))) {
+				return true;
+			}
+			for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+				if (ptr != best && ptr->dequeue(item)) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// This differs from the try_dequeue(item) method in that this one does
+	// not attempt to reduce contention by interleaving the order that producer
+	// streams are dequeued from. So, using this method can reduce overall throughput
+	// under contention, but will give more predictable results in single-threaded
+	// consumer scenarios. This is mostly only useful for internal unit tests.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue_non_interleaved(U& item)
+	{
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->dequeue(item)) {
+				return true;
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		// The idea is roughly as follows:
+		// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
+		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
+		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
+		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
+		
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return false;
+			}
+		}
+		
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
+			if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return true;
+		}
+		
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			if (ptr->dequeue(item)) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = 1;
+				return true;
+			}
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			count += ptr->dequeue_bulk(itemFirst, max - count);
+			if (count == max) {
+				break;
+			}
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return 0;
+			}
+		}
+		
+		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
+		if (count == max) {
+			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return max;
+		}
+		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
+		max -= count;
+		
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			auto dequeued = ptr->dequeue_bulk(itemFirst, max);
+			count += dequeued;
+			if (dequeued != 0) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
+			}
+			if (dequeued == max) {
+				break;
+			}
+			max -= dequeued;
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return count;
+	}
+	
+	
+	
+	// Attempts to dequeue from a specific producer's inner queue.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns false if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
+	}
+	
+	// Attempts to dequeue several elements from a specific producer's inner queue.
+	// Returns the number of items actually dequeued.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns 0 if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
+	}
+	
+	
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	size_t size_approx() const
+	{
+		size_t size = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			size += ptr->size_approx();
+		}
+		return size;
+	}
+	
+	
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static bool is_lock_free()
+	{
+		return
+			details::static_is_lock_free<bool>::value == 2 &&
+			details::static_is_lock_free<size_t>::value == 2 &&
+			details::static_is_lock_free<std::uint32_t>::value == 2 &&
+			details::static_is_lock_free<index_t>::value == 2 &&
+			details::static_is_lock_free<void*>::value == 2 &&
+			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
+	}
+
+
+private:
+	friend struct ProducerToken;
+	friend struct ConsumerToken;
+	struct ExplicitProducer;
+	friend struct ExplicitProducer;
+	struct ImplicitProducer;
+	friend struct ImplicitProducer;
+	friend class ConcurrentQueueTests;
+		
+	enum AllocationMode { CanAlloc, CannotAlloc };
+	
+	
+	///////////////////////////////
+	// Queue methods
+	///////////////////////////////
+	
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(producer_token_t const& token, U&& element)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+	
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(U&& element)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+	
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+	
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+	
+	inline bool update_current_producer_after_rotation(consumer_token_t& token)
+	{
+		// Ah, there's been a rotation, figure out where we should be!
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		if (token.desiredProducer == nullptr && tail == nullptr) {
+			return false;
+		}
+		auto prodCount = producerCount.load(std::memory_order_relaxed);
+		auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
+		if ((details::unlikely)(token.desiredProducer == nullptr)) {
+			// Aha, first time we're dequeueing anything.
+			// Figure out our local position
+			// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
+			std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
+			token.desiredProducer = tail;
+			for (std::uint32_t i = 0; i != offset; ++i) {
+				token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+				if (token.desiredProducer == nullptr) {
+					token.desiredProducer = tail;
+				}
+			}
+		}
+		
+		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
+		if (delta >= prodCount) {
+			delta = delta % prodCount;
+		}
+		for (std::uint32_t i = 0; i != delta; ++i) {
+			token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+			if (token.desiredProducer == nullptr) {
+				token.desiredProducer = tail;
+			}
+		}
+		
+		token.lastKnownGlobalOffset = globalOffset;
+		token.currentProducer = token.desiredProducer;
+		token.itemsConsumedFromCurrent = 0;
+		return true;
+	}
+	
+	
+	///////////////////////////
+	// Free list
+	///////////////////////////
+	
+	template <typename N>
+	struct FreeListNode
+	{
+		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
+		
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<N*> freeListNext;
+	};
+	
+	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
+	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
+	// speedy under low contention.
+	template<typename N>		// N must inherit FreeListNode or have the same fields (and initialization of them)
+	struct FreeList
+	{
+		FreeList() : freeListHead(nullptr) { }
+		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
+		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
+		
+		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		
+		inline void add(N* node)
+		{
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif		
+			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
+			// set it using a fetch_add
+			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
+				// Oh look! We were the last ones referencing this node, and we know
+				// we want to add it to the free list, so let's do it!
+		 		add_knowing_refcount_is_zero(node);
+			}
+		}
+		
+		inline N* try_get()
+		{
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif		
+			auto head = freeListHead.load(std::memory_order_acquire);
+			while (head != nullptr) {
+				auto prevHead = head;
+				auto refs = head->freeListRefs.load(std::memory_order_relaxed);
+				if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
+					head = freeListHead.load(std::memory_order_acquire);
+					continue;
+				}
+				
+				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
+				// next and not worry about it changing between now and the time we do the CAS
+				auto next = head->freeListNext.load(std::memory_order_relaxed);
+				if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
+					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
+					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
+					
+					// Decrease refcount twice, once for our ref, and once for the list's ref
+					head->freeListRefs.fetch_sub(2, std::memory_order_release);
+					return head;
+				}
+				
+				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
+				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
+				// count decrement happens-after the CAS on the head.
+				refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
+				if (refs == SHOULD_BE_ON_FREELIST + 1) {
+					add_knowing_refcount_is_zero(prevHead);
+				}
+			}
+			
+			return nullptr;
+		}
+		
+		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
+		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
+		
+	private:
+		inline void add_knowing_refcount_is_zero(N* node)
+		{
+			// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
+			// only one copy of this method per node at a time, i.e. the single thread case), then we know
+			// we can safely change the next pointer of the node; however, once the refcount is back above
+			// zero, then other threads could increase it (happens under heavy contention, when the refcount
+			// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
+			// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
+			// to add the node to the actual list fails, decrease the refcount and leave the add operation to
+			// the next thread who puts the refcount back at zero (which could be us, hence the loop).
+			auto head = freeListHead.load(std::memory_order_relaxed);
+			while (true) {
+				node->freeListNext.store(head, std::memory_order_relaxed);
+				node->freeListRefs.store(1, std::memory_order_release);
+				if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
+					// Hmm, the add failed, but we can only try again when the refcount goes back to zero
+					if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
+						continue;
+					}
+				}
+				return;
+			}
+		}
+		
+	private:
+		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
+		std::atomic<N*> freeListHead;
+	
+	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
+	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
+		
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+		debug::DebugMutex mutex;
+#endif
+	};
+	
+	
+	///////////////////////////
+	// Block
+	///////////////////////////
+	
+	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
+	
+	struct Block
+	{
+		Block()
+			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true)
+		{
+#ifdef MCDBGQ_TRACKMEM
+			owner = nullptr;
+#endif
+		}
+		
+		template<InnerQueueContext context>
+		inline bool is_empty() const
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Check flags
+				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
+					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
+						return false;
+					}
+				}
+				
+				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
+				std::atomic_thread_fence(std::memory_order_acquire);
+				return true;
+			}
+			else {
+				// Check counter
+				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
+					std::atomic_thread_fence(std::memory_order_acquire);
+					return true;
+				}
+				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
+				return false;
+			}
+		}
+		
+		// Returns true if the block is now empty (does not apply in explicit context)
+		template<InnerQueueContext context>
+		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flag
+				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
+				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
+				assert(prevVal < BLOCK_SIZE);
+				return prevVal == BLOCK_SIZE - 1;
+			}
+		}
+		
+		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
+		// Returns true if the block is now empty (does not apply in explicit context).
+		template<InnerQueueContext context>
+		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flags
+				std::atomic_thread_fence(std::memory_order_release);
+				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
+				for (size_t j = 0; j != count; ++j) {
+					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
+					emptyFlags[i + j].store(true, std::memory_order_relaxed);
+				}
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
+				assert(prevVal + count <= BLOCK_SIZE);
+				return prevVal + count == BLOCK_SIZE;
+			}
+		}
+		
+		template<InnerQueueContext context>
+		inline void set_all_empty()
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set all flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(true, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
+			}
+		}
+		
+		template<InnerQueueContext context>
+		inline void reset_empty()
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Reset flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(false, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
+			}
+		}
+		
+		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		
+	private:
+		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
+		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
+	public:
+		Block* next;
+		std::atomic<size_t> elementsCompletelyDequeued;
+		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
+	public:
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<Block*> freeListNext;
+		std::atomic<bool> shouldBeOnFreeList;
+		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
+		
+#ifdef MCDBGQ_TRACKMEM
+		void* owner;
+#endif
+	};
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+
+
+#ifdef MCDBGQ_TRACKMEM
+public:
+	struct MemStats;
+private:
+#endif
+	
+	///////////////////////////
+	// Producer base
+	///////////////////////////
+	
+	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
+	{
+		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
+			tailIndex(0),
+			headIndex(0),
+			dequeueOptimisticCount(0),
+			dequeueOvercommit(0),
+			tailBlock(nullptr),
+			isExplicit(isExplicit_),
+			parent(parent_)
+		{
+		}
+		
+		virtual ~ProducerBase() { }
+		
+		template<typename U>
+		inline bool dequeue(U& element)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue(element);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue(element);
+			}
+		}
+		
+		template<typename It>
+		inline size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+		}
+		
+		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
+		
+		inline size_t size_approx() const
+		{
+			auto tail = tailIndex.load(std::memory_order_relaxed);
+			auto head = headIndex.load(std::memory_order_relaxed);
+			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
+		}
+		
+		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
+	protected:
+		std::atomic<index_t> tailIndex;		// Where to enqueue to next
+		std::atomic<index_t> headIndex;		// Where to dequeue from next
+		
+		std::atomic<index_t> dequeueOptimisticCount;
+		std::atomic<index_t> dequeueOvercommit;
+		
+		Block* tailBlock;
+		
+	public:
+		bool isExplicit;
+		ConcurrentQueue* parent;
+		
+	protected:
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	///////////////////////////
+	// Explicit queue
+	///////////////////////////
+		
+	struct ExplicitProducer : public ProducerBase
+	{
+		explicit ExplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, true),
+			blockIndex(nullptr),
+			pr_blockIndexSlotsUsed(0),
+			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
+			pr_blockIndexFront(0),
+			pr_blockIndexEntries(nullptr),
+			pr_blockIndexRaw(nullptr)
+		{
+			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
+			if (poolBasedIndexSize > pr_blockIndexSize) {
+				pr_blockIndexSize = poolBasedIndexSize;
+			}
+			
+			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
+		}
+		
+		~ExplicitProducer()
+		{
+			// Destruct any elements not yet dequeued.
+			// Since we're in the destructor, we can assume all elements
+			// are either completely dequeued or completely not (no halfways).
+			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
+				// First find the block that's partially dequeued, if any
+				Block* halfDequeuedBlock = nullptr;
+				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
+					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
+					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
+					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
+						i = (i + 1) & (pr_blockIndexSize - 1);
+					}
+					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
+					halfDequeuedBlock = pr_blockIndexEntries[i].block;
+				}
+				
+				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
+				auto block = this->tailBlock;
+				do {
+					block = block->next;
+					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+						continue;
+					}
+					
+					size_t i = 0;	// Offset into block
+					if (block == halfDequeuedBlock) {
+						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					}
+					
+					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
+					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
+						(*block)[i++]->~T();
+					}
+				} while (block != this->tailBlock);
+			}
+			
+			// Destroy all blocks that we own
+			if (this->tailBlock != nullptr) {
+				auto block = this->tailBlock;
+				do {
+					auto nextBlock = block->next;
+					if (block->dynamicallyAllocated) {
+						destroy(block);
+					}
+					else {
+						this->parent->add_block_to_free_list(block);
+					}
+					block = nextBlock;
+				} while (block != this->tailBlock);
+			}
+			
+			// Destroy the block indices
+			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
+			while (header != nullptr) {
+				auto prev = static_cast<BlockIndexHeader*>(header->prev);
+				header->~BlockIndexHeader();
+				(Traits::free)(header);
+				header = prev;
+			}
+		}
+		
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto startBlock = this->tailBlock;
+				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					// We can re-use the block ahead of us, it's empty!					
+					this->tailBlock = this->tailBlock->next;
+					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					
+					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
+					// last block from it first -- except instead of removing then adding, we can just overwrite).
+					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
+					// it would have been re-attempted when adding the first block to the queue; since there is such
+					// a block, a block index must have been successfully allocated.
+				}
+				else {
+					// Whatever head value we see here is >= the last value we saw here (relatively),
+					// and <= its current value. Since we have the most recent tail, the head must be
+					// <= to it.
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+						// We can't enqueue in another block because there's not enough leeway -- the
+						// tail could surpass the head by the time the block fills up! (Or we'll exceed
+						// the size limit, if the second part of the condition was true.)
+						return false;
+					}
+					// We're going to need a new block; check that the block index has room
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
+						// Hmm, the circular block index is already full -- we'll need
+						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
+						// the initial allocation failed in the constructor.
+						
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							return false;
+						}
+						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
+							return false;
+						}
+					}
+					
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						return false;
+					}
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					++pr_blockIndexSlotsUsed;
+				}
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					// The constructor may throw. We want the element not to appear in the queue in
+					// that case (without corrupting the queue):
+					MOODYCAMEL_TRY {
+						new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Revert change to the current block, but leave the new block available
+						// for next time
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				else {
+					(void)startBlock;
+					(void)originalBlockIndexSlotsUsed;
+				}
+				
+				// Add block to block index
+				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+				entry.base = currentTailIndex;
+				entry.block = this->tailBlock;
+				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
+				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+			
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				// Might be something to dequeue, let's give it a try
+				
+				// Note that this if is purely for performance purposes in the common case when the queue is
+				// empty and the values are eventually consistent -- we may enter here spuriously.
+				
+				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
+				// change them) and must be the same value at this point (inside the if) as when the if condition was
+				// evaluated.
+
+				// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
+				// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
+				// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
+				// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
+				// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
+				// unfortunately that can't be shown to be correct using only the C++11 standard.
+				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				// Increment optimistic counter, then check if it went over the boundary
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				
+				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
+				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
+				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
+				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
+				// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
+				// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
+				
+				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
+				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
+				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					// Guaranteed to be at least one element to dequeue!
+					
+					// Get the index. Note that since there's guaranteed to be at least one element, this
+					// will never exceed tail. We need to do an acquire-release fence here since it's possible
+					// that whatever condition got us to this point was for an earlier enqueued element (that
+					// we already see the memory effects for), but that by the time we increment somebody else
+					// has incremented it, and we need to see the memory effects for *that* element, which is
+					// in such a case is necessarily visible on the thread that incremented it in the first
+					// place with the more current condition (they must have acquired a tail that is at least
+					// as recent).
+					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					
+					// Determine which block the element is in
+					
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+					
+					// We need to be careful here about subtracting and dividing because of index wrap-around.
+					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
+					// block size (in order to get a correct signed block count offset in all cases):
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
+					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
+					
+					// Dequeue
+					auto& el = *((*block)[index]);
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+						// Make sure the element is still fully dequeued and destroyed even if the assignment
+						// throws
+						struct Guard {
+							Block* block;
+							index_t index;
+							
+							~Guard()
+							{
+								(*block)[index]->~T();
+								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+							}
+						} guard = { block, index };
+
+						element = std::move(el); // NOLINT
+					}
+					else {
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+					}
+					
+					return true;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
+				}
+			}
+		
+			return false;
+		}
+		
+		template<AllocationMode allocMode, typename It>
+		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			auto originalBlockIndexFront = pr_blockIndexFront;
+			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+			
+			Block* firstAllocatedBlock = nullptr;
+			
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+				// Allocate as many blocks as possible from ahead
+				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					this->tailBlock = this->tailBlock->next;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+					
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+				
+				// Now allocate as many blocks as necessary from the block pool
+				while (blockBaseDiff > 0) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						
+						// pr_blockIndexFront is updated inside new_block_index, so we need to
+						// update our fallback value too (since we keep the new index even if we
+						// later fail)
+						originalBlockIndexFront = originalBlockIndexSlotsUsed;
+					}
+					
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						return false;
+					}
+					
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+					
+					++pr_blockIndexSlotsUsed;
+					
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+				
+				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
+				// publish the new block index front
+				auto block = firstAllocatedBlock;
+				while (true) {
+					block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (block == this->tailBlock) {
+						break;
+					}
+					block = block->next;
+				}
+				
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+				}
+			}
+			
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			auto endBlock = this->tailBlock;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							// Must use copy constructor even if move constructor is available
+							// because we may have to revert if there's an exception.
+							// Sorry about the horrible templated next line, but it was the only way
+							// to disable moving *at compile time*, which is important because a type
+							// may only define a (noexcept) move constructor, and so calls to the
+							// cctor will not compile, even if they are in an if branch that will never
+							// be executed
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Oh dear, an exception's been thrown -- destroy the elements that
+						// were enqueued so far and revert the entire bulk operation (we'll keep
+						// any allocated blocks in our linked list for later, though).
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+						
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			
+			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+				if (firstAllocatedBlock != nullptr)
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+			}
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+				
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+					
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
+					// Determine which block the first element is in
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+					
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
+					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
+					
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					do {
+						auto firstIndexInBlock = index;
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						auto block = localBlockIndex->entries[indexIndex].block;
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								// It's too late to revert the dequeue, but we can make sure that all
+								// the dequeued objects are properly destroyed and the block index
+								// (and empty count) are properly updated before we propagate the exception
+								do {
+									block = localBlockIndex->entries[indexIndex].block;
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+									
+									firstIndexInBlock = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+								
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+					} while (index != firstIndex + actualCount);
+					
+					return actualCount;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+			
+			return 0;
+		}
+		
+	private:
+		struct BlockIndexEntry
+		{
+			index_t base;
+			Block* block;
+		};
+		
+		struct BlockIndexHeader
+		{
+			size_t size;
+			std::atomic<size_t> front;		// Current slot (not next, like pr_blockIndexFront)
+			BlockIndexEntry* entries;
+			void* prev;
+		};
+		
+		
+		bool new_block_index(size_t numberOfFilledSlotsToExpose)
+		{
+			auto prevBlockSizeMask = pr_blockIndexSize - 1;
+			
+			// Create the new block
+			pr_blockIndexSize <<= 1;
+			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
+			if (newRawPtr == nullptr) {
+				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
+				return false;
+			}
+			
+			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
+			
+			// Copy in all the old indices, if any
+			size_t j = 0;
+			if (pr_blockIndexSlotsUsed != 0) {
+				auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
+				do {
+					newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
+					i = (i + 1) & prevBlockSizeMask;
+				} while (i != pr_blockIndexFront);
+			}
+			
+			// Update everything
+			auto header = new (newRawPtr) BlockIndexHeader;
+			header->size = pr_blockIndexSize;
+			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
+			header->entries = newBlockIndexEntries;
+			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
+			
+			pr_blockIndexFront = j;
+			pr_blockIndexEntries = newBlockIndexEntries;
+			pr_blockIndexRaw = newRawPtr;
+			blockIndex.store(header, std::memory_order_release);
+			
+			return true;
+		}
+		
+	private:
+		std::atomic<BlockIndexHeader*> blockIndex;
+		
+		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
+		size_t pr_blockIndexSlotsUsed;
+		size_t pr_blockIndexSize;
+		size_t pr_blockIndexFront;		// Next slot (not current)
+		BlockIndexEntry* pr_blockIndexEntries;
+		void* pr_blockIndexRaw;
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ExplicitProducer* nextExplicitProducer;
+	private:
+#endif
+		
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	//////////////////////////////////
+	// Implicit queue
+	//////////////////////////////////
+	
+	struct ImplicitProducer : public ProducerBase
+	{			
+		ImplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, false),
+			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
+			blockIndex(nullptr)
+		{
+			new_block_index();
+		}
+		
+		~ImplicitProducer()
+		{
+			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
+			// completed already; this means that all undequeued elements are placed contiguously across
+			// contiguous blocks, and that only the first and last remaining blocks can be only partially
+			// empty (all other remaining blocks must be completely full).
+			
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+			// Unregister ourselves for thread termination notification
+			if (!this->inactive.load(std::memory_order_relaxed)) {
+				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
+			}
+#endif
+			
+			// Destroy all remaining elements!
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto index = this->headIndex.load(std::memory_order_relaxed);
+			Block* block = nullptr;
+			assert(index == tail || details::circular_less_than(index, tail));
+			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
+			while (index != tail) {
+				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
+					if (block != nullptr) {
+						// Free the old block
+						this->parent->add_block_to_free_list(block);
+					}
+					
+					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
+				}
+				
+				((*block)[index])->~T();
+				++index;
+			}
+			// Even if the queue is empty, there's still one block that's not on the free list
+			// (unless the head index reached the end of it, in which case the tail will be poised
+			// to create a new block).
+			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
+				this->parent->add_block_to_free_list(this->tailBlock);
+			}
+			
+			// Destroy block index
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			if (localBlockIndex != nullptr) {
+				for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
+					localBlockIndex->index[i]->~BlockIndexEntry();
+				}
+				do {
+					auto prev = localBlockIndex->prev;
+					localBlockIndex->~BlockIndexHeader();
+					(Traits::free)(localBlockIndex);
+					localBlockIndex = prev;
+				} while (localBlockIndex != nullptr);
+			}
+		}
+		
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto head = this->headIndex.load(std::memory_order_relaxed);
+				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+					return false;
+				}
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				// Find out where we'll be inserting this block in the block index
+				BlockIndexEntry* idxEntry;
+				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
+					return false;
+				}
+				
+				// Get ahold of a new block
+				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+				if (newBlock == nullptr) {
+					rewind_block_index_tail();
+					idxEntry->value.store(nullptr, std::memory_order_relaxed);
+					return false;
+				}
+#ifdef MCDBGQ_TRACKMEM
+				newBlock->owner = this;
+#endif
+				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					// May throw, try to insert now before we publish the fact that we have this new block
+					MOODYCAMEL_TRY {
+						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						rewind_block_index_tail();
+						idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						this->parent->add_block_to_free_list(newBlock);
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				// Insert the new block into the index
+				idxEntry->value.store(newBlock, std::memory_order_relaxed);
+				
+				this->tailBlock = newBlock;
+				
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+			
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			// See ExplicitProducer::dequeue for rationale and explanation
+			index_t tail = this->tailIndex.load(std::memory_order_relaxed);
+			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					// Determine which block the element is in
+					auto entry = get_block_index_entry_for_index(index);
+					
+					// Dequeue
+					auto block = entry->value.load(std::memory_order_relaxed);
+					auto& el = *((*block)[index]);
+					
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+						// Note: Acquiring the mutex with every dequeue instead of only when a block
+						// is released is very sub-optimal, but it is, after all, purely debug code.
+						debug::DebugLock lock(producer->mutex);
+#endif
+						struct Guard {
+							Block* block;
+							index_t index;
+							BlockIndexEntry* entry;
+							ConcurrentQueue* parent;
+							
+							~Guard()
+							{
+								(*block)[index]->~T();
+								if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+									entry->value.store(nullptr, std::memory_order_relaxed);
+									parent->add_block_to_free_list(block);
+								}
+							}
+						} guard = { block, index, entry, this->parent };
+
+						element = std::move(el); // NOLINT
+					}
+					else {
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+
+						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+							{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Add the block back into the global free pool (and remove from block index)
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+					}
+					
+					return true;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
+				}
+			}
+		
+			return false;
+		}
+		
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
+		template<AllocationMode allocMode, typename It>
+		bool enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			
+			// Note that the tailBlock we start off with may not be owned by us any more;
+			// this happens if it was filled up exactly to the top (setting tailIndex to
+			// the first index of the next block which is not yet allocated), then dequeued
+			// completely (putting it on the free list) before we enqueue again.
+			
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			Block* firstAllocatedBlock = nullptr;
+			auto endBlock = this->tailBlock;
+			
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				do {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					// Find out where we'll be inserting this block in the block index
+					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
+					Block* newBlock;
+					bool indexInserted = false;
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+
+					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
+						// Index allocation or block allocation failed; revert any other allocations
+						// and index insertions done so far for this operation
+						if (indexInserted) {
+							rewind_block_index_tail();
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						}
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						
+						return false;
+					}
+					
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+					newBlock->next = nullptr;
+					
+					// Insert the new block into the index
+					idxEntry->value.store(newBlock, std::memory_order_relaxed);
+					
+					// Store the chain of blocks so that we can undo if later allocations fail,
+					// and so that we can find the blocks when we do the actual enqueueing
+					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
+						assert(this->tailBlock != nullptr);
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					endBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
+				} while (blockBaseDiff > 0);
+			}
+			
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+						
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+		
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+				
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+					
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					BlockIndexHeader* localBlockIndex;
+					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
+					do {
+						auto blockStartIndex = index;
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						
+						auto entry = localBlockIndex->index[indexIndex];
+						auto block = entry->value.load(std::memory_order_relaxed);
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								do {
+									entry = localBlockIndex->index[indexIndex];
+									block = entry->value.load(std::memory_order_relaxed);
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									
+									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+										debug::DebugLock lock(mutex);
+#endif
+										entry->value.store(nullptr, std::memory_order_relaxed);
+										this->parent->add_block_to_free_list(block);
+									}
+									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+									
+									blockStartIndex = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+								
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+							{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
+								// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+					} while (index != firstIndex + actualCount);
+					
+					return actualCount;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+			
+			return 0;
+		}
+		
+	private:
+		// The block size must be > 1, so any number with the low bit set is an invalid block base index
+		static const index_t INVALID_BLOCK_BASE = 1;
+		
+		struct BlockIndexEntry
+		{
+			std::atomic<index_t> key;
+			std::atomic<Block*> value;
+		};
+		
+		struct BlockIndexHeader
+		{
+			size_t capacity;
+			std::atomic<size_t> tail;
+			BlockIndexEntry* entries;
+			BlockIndexEntry** index;
+			BlockIndexHeader* prev;
+		};
+		
+		template<AllocationMode allocMode>
+		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);		// We're the only writer thread, relaxed is OK
+			if (localBlockIndex == nullptr) {
+				return false;  // this can happen if new_block_index failed in the constructor
+			}
+			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			idxEntry = localBlockIndex->index[newTail];
+			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
+				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
+				
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
+			
+			// No room in the old block index, try to allocate another one!
+			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+				return false;
+			}
+			else if (!new_block_index()) {
+				return false;
+			}
+			localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			idxEntry = localBlockIndex->index[newTail];
+			assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+			idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+			localBlockIndex->tail.store(newTail, std::memory_order_release);
+			return true;
+		}
+		
+		inline void rewind_block_index_tail()
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
+		}
+		
+		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
+		{
+			BlockIndexHeader* localBlockIndex;
+			auto idx = get_block_index_index_for_index(index, localBlockIndex);
+			return localBlockIndex->index[idx];
+		}
+		
+		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
+		{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+			debug::DebugLock lock(mutex);
+#endif
+			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
+			localBlockIndex = blockIndex.load(std::memory_order_acquire);
+			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
+			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
+			assert(tailBase != INVALID_BLOCK_BASE);
+			// Note: Must use division instead of shift because the index may wrap around, causing a negative
+			// offset, whose negativity we want to preserve
+			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);
+			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
+			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
+			return idx;
+		}
+		
+		bool new_block_index()
+		{
+			auto prev = blockIndex.load(std::memory_order_relaxed);
+			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
+			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
+			auto raw = static_cast<char*>((Traits::malloc)(
+				sizeof(BlockIndexHeader) +
+				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
+				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
+			if (raw == nullptr) {
+				return false;
+			}
+			
+			auto header = new (raw) BlockIndexHeader;
+			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
+			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
+			if (prev != nullptr) {
+				auto prevTail = prev->tail.load(std::memory_order_relaxed);
+				auto prevPos = prevTail;
+				size_t i = 0;
+				do {
+					prevPos = (prevPos + 1) & (prev->capacity - 1);
+					index[i++] = prev->index[prevPos];
+				} while (prevPos != prevTail);
+				assert(i == prevCapacity);
+			}
+			for (size_t i = 0; i != entryCount; ++i) {
+				new (entries + i) BlockIndexEntry;
+				entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
+				index[prevCapacity + i] = entries + i;
+			}
+			header->prev = prev;
+			header->entries = entries;
+			header->index = index;
+			header->capacity = nextBlockIndexCapacity;
+			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
+			
+			blockIndex.store(header, std::memory_order_release);
+			
+			nextBlockIndexCapacity <<= 1;
+			
+			return true;
+		}
+		
+	private:
+		size_t nextBlockIndexCapacity;
+		std::atomic<BlockIndexHeader*> blockIndex;
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	public:
+		details::ThreadExitListener threadExitListener;
+	private:
+#endif
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ImplicitProducer* nextImplicitProducer;
+	private:
+#endif
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+		mutable debug::DebugMutex mutex;
+#endif
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	//////////////////////////////////
+	// Block pool manipulation
+	//////////////////////////////////
+	
+	void populate_initial_block_list(size_t blockCount)
+	{
+		initialBlockPoolSize = blockCount;
+		if (initialBlockPoolSize == 0) {
+			initialBlockPool = nullptr;
+			return;
+		}
+		
+		initialBlockPool = create_array<Block>(blockCount);
+		if (initialBlockPool == nullptr) {
+			initialBlockPoolSize = 0;
+		}
+		for (size_t i = 0; i < initialBlockPoolSize; ++i) {
+			initialBlockPool[i].dynamicallyAllocated = false;
+		}
+	}
+	
+	inline Block* try_get_block_from_initial_pool()
+	{
+		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
+			return nullptr;
+		}
+		
+		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
+		
+		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
+	}
+	
+	inline void add_block_to_free_list(Block* block)
+	{
+#ifdef MCDBGQ_TRACKMEM
+		block->owner = nullptr;
+#endif
+		freeList.add(block);
+	}
+	
+	inline void add_blocks_to_free_list(Block* block)
+	{
+		while (block != nullptr) {
+			auto next = block->next;
+			add_block_to_free_list(block);
+			block = next;
+		}
+	}
+	
+	inline Block* try_get_block_from_free_list()
+	{
+		return freeList.try_get();
+	}
+	
+	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
+	template<AllocationMode canAlloc>
+	Block* requisition_block()
+	{
+		auto block = try_get_block_from_initial_pool();
+		if (block != nullptr) {
+			return block;
+		}
+		
+		block = try_get_block_from_free_list();
+		if (block != nullptr) {
+			return block;
+		}
+		
+		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
+			return create<Block>();
+		}
+		else {
+			return nullptr;
+		}
+	}
+	
+
+#ifdef MCDBGQ_TRACKMEM
+	public:
+		struct MemStats {
+			size_t allocatedBlocks;
+			size_t usedBlocks;
+			size_t freeBlocks;
+			size_t ownedBlocksExplicit;
+			size_t ownedBlocksImplicit;
+			size_t implicitProducers;
+			size_t explicitProducers;
+			size_t elementsEnqueued;
+			size_t blockClassBytes;
+			size_t queueClassBytes;
+			size_t implicitBlockIndexBytes;
+			size_t explicitBlockIndexBytes;
+			
+			friend class ConcurrentQueue;
+			
+		private:
+			static MemStats getFor(ConcurrentQueue* q)
+			{
+				MemStats stats = { 0 };
+				
+				stats.elementsEnqueued = q->size_approx();
+			
+				auto block = q->freeList.head_unsafe();
+				while (block != nullptr) {
+					++stats.allocatedBlocks;
+					++stats.freeBlocks;
+					block = block->freeListNext.load(std::memory_order_relaxed);
+				}
+				
+				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
+					stats.implicitProducers += implicit ? 1 : 0;
+					stats.explicitProducers += implicit ? 0 : 1;
+					
+					if (implicit) {
+						auto prod = static_cast<ImplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ImplicitProducer);
+						auto head = prod->headIndex.load(std::memory_order_relaxed);
+						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
+						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
+						if (hash != nullptr) {
+							for (size_t i = 0; i != hash->capacity; ++i) {
+								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
+									++stats.allocatedBlocks;
+									++stats.ownedBlocksImplicit;
+								}
+							}
+							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
+							for (; hash != nullptr; hash = hash->prev) {
+								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
+							}
+						}
+						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
+							//auto block = prod->get_block_index_entry_for_index(head);
+							++stats.usedBlocks;
+						}
+					}
+					else {
+						auto prod = static_cast<ExplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ExplicitProducer);
+						auto tailBlock = prod->tailBlock;
+						bool wasNonEmpty = false;
+						if (tailBlock != nullptr) {
+							auto block = tailBlock;
+							do {
+								++stats.allocatedBlocks;
+								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
+									++stats.usedBlocks;
+									wasNonEmpty = wasNonEmpty || block != tailBlock;
+								}
+								++stats.ownedBlocksExplicit;
+								block = block->next;
+							} while (block != tailBlock);
+						}
+						auto index = prod->blockIndex.load(std::memory_order_relaxed);
+						while (index != nullptr) {
+							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
+							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
+						}
+					}
+				}
+				
+				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
+				stats.allocatedBlocks += freeOnInitialPool;
+				stats.freeBlocks += freeOnInitialPool;
+				
+				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
+				stats.queueClassBytes += sizeof(ConcurrentQueue);
+				
+				return stats;
+			}
+		};
+		
+		// For debugging only. Not thread-safe.
+		MemStats getMemStats()
+		{
+			return MemStats::getFor(this);
+		}
+	private:
+		friend struct MemStats;
+#endif
+	
+	
+	//////////////////////////////////
+	// Producer list manipulation
+	//////////////////////////////////	
+	
+	ProducerBase* recycle_or_create_producer(bool isExplicit)
+	{
+		bool recycled;
+		return recycle_or_create_producer(isExplicit, recycled);
+	}
+	
+	ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled)
+	{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		// Try to re-use one first
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
+				bool expected = true;
+				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// We caught one! It's been marked as activated, the caller can have it
+					recycled = true;
+					return ptr;
+				}
+			}
+		}
+		
+		recycled = false;
+		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
+	}
+	
+	ProducerBase* add_producer(ProducerBase* producer)
+	{
+		// Handle failed memory allocation
+		if (producer == nullptr) {
+			return nullptr;
+		}
+		
+		producerCount.fetch_add(1, std::memory_order_relaxed);
+		
+		// Add it to the lock-free list
+		auto prevTail = producerListTail.load(std::memory_order_relaxed);
+		do {
+			producer->next = prevTail;
+		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		if (producer->isExplicit) {
+			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
+			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+		else {
+			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
+			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+#endif
+		
+		return producer;
+	}
+	
+	void reown_producers()
+	{
+		// After another instance is moved-into/swapped-with this one, all the
+		// producers we stole still think their parents are the other queue.
+		// So fix them up!
+		for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
+			ptr->parent = this;
+		}
+	}
+	
+	
+	//////////////////////////////////
+	// Implicit producer hash
+	//////////////////////////////////
+	
+	struct ImplicitProducerKVP
+	{
+		std::atomic<details::thread_id_t> key;
+		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
+		
+		ImplicitProducerKVP() : value(nullptr) { }
+		
+		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
+			value = other.value;
+		}
+		
+		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			swap(other);
+			return *this;
+		}
+		
+		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
+		{
+			if (this != &other) {
+				details::swap_relaxed(key, other.key);
+				std::swap(value, other.value);
+			}
+		}
+	};
+	
+	template<typename XT, typename XTraits>
+	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
+	
+	struct ImplicitProducerHash
+	{
+		size_t capacity;
+		ImplicitProducerKVP* entries;
+		ImplicitProducerHash* prev;
+	};
+	
+	inline void populate_initial_implicit_producer_hash()
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			implicitProducerHashCount.store(0, std::memory_order_relaxed);
+			auto hash = &initialImplicitProducerHash;
+			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+			hash->entries = &initialImplicitProducerHashEntries[0];
+			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+			}
+			hash->prev = nullptr;
+			implicitProducerHash.store(hash, std::memory_order_relaxed);
+		}
+	}
+	
+	void swap_implicit_producer_hashes(ConcurrentQueue& other)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			// Swap (assumes our implicit producer hash is initialized)
+			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+			
+			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+			
+			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &initialImplicitProducerHash;
+			}
+			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &other.initialImplicitProducerHash;
+			}
+		}
+	}
+	
+	// Only fails (returns nullptr) if memory allocation fails
+	ImplicitProducer* get_or_add_implicit_producer()
+	{
+		// Note that since the data is essentially thread-local (key is thread ID),
+		// there's a reduced need for fences (memory ordering is already consistent
+		// for any individual thread), except for the current table itself.
+		
+		// Start by looking for the thread ID in the current and all previous hash tables.
+		// If it's not found, it must not be in there yet, since this same thread would
+		// have added it previously to one of the tables that we traversed.
+		
+		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
+		
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		
+		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
+		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
+			// Look for the id in this hash
+			auto index = hashedId;
+			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
+				index &= hash->capacity - 1;
+				
+				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					// Found it! If we had to search several hashes deep, though, we should lazily add it
+					// to the current main hash table to avoid the extended search next time.
+					// Note there's guaranteed to be room in the current hash table since every subsequent
+					// table implicitly reserves space for all previous tables (there's only one
+					// implicitProducerHashCount).
+					auto value = hash->entries[index].value;
+					if (hash != mainHash) {
+						index = hashedId;
+						while (true) {
+							index &= mainHash->capacity - 1;
+							probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+							auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+							auto reusable = details::invalid_thread_id2;
+							if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
+								(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
+#else
+							if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed))) {
+#endif
+								mainHash->entries[index].value = value;
+								break;
+							}
+							++index;
+						}
+					}
+					
+					return value;
+				}
+				if (probedKey == details::invalid_thread_id) {
+					break;		// Not in this hash table
+				}
+				++index;
+			}
+		}
+		
+		// Insert!
+		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
+		while (true) {
+			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
+			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
+				// We've acquired the resize lock, try to allocate a bigger hash table.
+				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
+				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
+				// locked block).
+				mainHash = implicitProducerHash.load(std::memory_order_acquire);
+				if (newCount >= (mainHash->capacity >> 1)) {
+					auto newCapacity = mainHash->capacity << 1;
+					while (newCount >= (newCapacity >> 1)) {
+						newCapacity <<= 1;
+					}
+					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
+					if (raw == nullptr) {
+						// Allocation failed
+						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+						return nullptr;
+					}
+					
+					auto newHash = new (raw) ImplicitProducerHash;
+					newHash->capacity = static_cast<size_t>(newCapacity);
+					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
+					for (size_t i = 0; i != newCapacity; ++i) {
+						new (newHash->entries + i) ImplicitProducerKVP;
+						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+					}
+					newHash->prev = mainHash;
+					implicitProducerHash.store(newHash, std::memory_order_release);
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+					mainHash = newHash;
+				}
+				else {
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+				}
+			}
+			
+			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
+			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
+			// always be true)
+			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
+				bool recycled;
+				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled));
+				if (producer == nullptr) {
+					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+					return nullptr;
+				}
+				if (recycled) {
+					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+				}
+				
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
+				producer->threadExitListener.userData = producer;
+				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
+#endif
+				
+				auto index = hashedId;
+				while (true) {
+					index &= mainHash->capacity - 1;
+					auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+					
+					auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+					auto reusable = details::invalid_thread_id2;
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
+						(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
+#else
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed))) {
+#endif
+						mainHash->entries[index].value = producer;
+						break;
+					}
+					++index;
+				}
+				return producer;
+			}
+			
+			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
+			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
+			// we try to allocate ourselves).
+			mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		}
+	}
+	
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	void implicit_producer_thread_exited(ImplicitProducer* producer)
+	{
+		// Remove from thread exit listeners
+		details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
+		
+		// Remove from hash
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		auto hash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		details::thread_id_t probedKey;
+		
+		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
+		// trying to add an entry thinking there's a free slot (because they reused a producer)
+		for (; hash != nullptr; hash = hash->prev) {
+			auto index = hashedId;
+			do {
+				index &= hash->capacity - 1;
+				probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);
+					break;
+				}
+				++index;
+			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
+		}
+		
+		// Mark the queue as being recyclable
+		producer->inactive.store(true, std::memory_order_release);
+	}
+	
+	static void implicit_producer_thread_exited_callback(void* userData)
+	{
+		auto producer = static_cast<ImplicitProducer*>(userData);
+		auto queue = producer->parent;
+		queue->implicit_producer_thread_exited(producer);
+	}
+#endif
+	
+	//////////////////////////////////
+	// Utility functions
+	//////////////////////////////////
+
+	template<typename TAlign>
+	static inline void* aligned_malloc(size_t size)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::malloc)(size);
+		else {
+			size_t alignment = std::alignment_of<TAlign>::value;
+			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
+			if (!raw)
+				return nullptr;
+			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
+			*(reinterpret_cast<void**>(ptr) - 1) = raw;
+			return ptr;
+		}
+	}
+
+	template<typename TAlign>
+	static inline void aligned_free(void* ptr)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::free)(ptr);
+		else
+			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
+	}
+
+	template<typename U>
+	static inline U* create_array(size_t count)
+	{
+		assert(count > 0);
+		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
+		if (p == nullptr)
+			return nullptr;
+
+		for (size_t i = 0; i != count; ++i)
+			new (p + i) U();
+		return p;
+	}
+
+	template<typename U>
+	static inline void destroy_array(U* p, size_t count)
+	{
+		if (p != nullptr) {
+			assert(count > 0);
+			for (size_t i = count; i != 0; )
+				(p + --i)->~U();
+		}
+		aligned_free<U>(p);
+	}
+
+	template<typename U>
+	static inline U* create()
+	{
+		void* p = aligned_malloc<U>(sizeof(U));
+		return p != nullptr ? new (p) U : nullptr;
+	}
+
+	template<typename U, typename A1>
+	static inline U* create(A1&& a1)
+	{
+		void* p = aligned_malloc<U>(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+	}
+
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr)
+			p->~U();
+		aligned_free<U>(p);
+	}
+
+private:
+	std::atomic<ProducerBase*> producerListTail;
+	std::atomic<std::uint32_t> producerCount;
+	
+	std::atomic<size_t> initialBlockPoolIndex;
+	Block* initialBlockPool;
+	size_t initialBlockPoolSize;
+	
+#ifndef MCDBGQ_USEDEBUGFREELIST
+	FreeList<Block> freeList;
+#else
+	debug::DebugFreeList<Block> freeList;
+#endif
+	
+	std::atomic<ImplicitProducerHash*> implicitProducerHash;
+	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
+	ImplicitProducerHash initialImplicitProducerHash;
+	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
+	std::atomic_flag implicitProducerHashResizeInProgress;
+	
+	std::atomic<std::uint32_t> nextExplicitConsumerId;
+	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
+	
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+	debug::DebugMutex implicitProdMutex;
+#endif
+	
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	std::atomic<ExplicitProducer*> explicitProducers;
+	std::atomic<ImplicitProducer*> implicitProducers;
+#endif
+};
+
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
+	: producer(queue.recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+}
+
+template<typename T, typename Traits>
+inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+}
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#pragma GCC diagnostic pop
+#endif
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/lightweightsemaphore.h b/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/lightweightsemaphore.h
new file mode 100644
index 0000000000..85e8f78d92
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/concurrent_queue/lightweightsemaphore.h
@@ -0,0 +1,411 @@
+// Provides an efficient implementation of a semaphore (LightweightSemaphore).
+// This is an extension of Jeff Preshing's sempahore implementation (licensed 
+// under the terms of its separate zlib license) that has been adapted and
+// extended by Cameron Desrochers.
+
+#pragma once
+
+#include <cstddef> // For std::size_t
+#include <atomic>
+#include <type_traits> // For std::make_signed<T>
+
+#if defined(_WIN32)
+// Avoid including windows.h in a header; we only need a handful of
+// items, so we'll redeclare them here (this is relatively safe since
+// the API generally has to remain stable between Windows versions).
+// I know this is an ugly hack but it still beats polluting the global
+// namespace with thousands of generic names or adding a .cpp for nothing.
+extern "C" {
+    struct _SECURITY_ATTRIBUTES;
+    __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+    __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+    __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+    __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+}
+#elif defined(__MACH__)
+#include <mach/mach.h>
+#elif defined(__unix__)
+#include <semaphore.h>
+#endif
+
+namespace moodycamel
+{
+namespace details
+{
+
+// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
+// portable + lightweight semaphore implementations, originally from
+// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+// LICENSE:
+// Copyright (c) 2015 Jeff Preshing
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+//  claim that you wrote the original software. If you use this software
+//  in a product, an acknowledgement in the product documentation would be
+//  appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//  misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+#if defined(_WIN32)
+class Semaphore
+{
+private:
+    void* m_hSema;
+    
+    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+    Semaphore(int initialCount = 0)
+    {
+        assert(initialCount >= 0);
+        const long maxLong = 0x7fffffff;
+        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+        assert(m_hSema);
+    }
+
+    ~Semaphore()
+    {
+        CloseHandle(m_hSema);
+    }
+
+    bool wait()
+    {
+        const unsigned long infinite = 0xffffffff;
+        return WaitForSingleObject(m_hSema, infinite) == 0;
+    }
+    
+    bool try_wait()
+    {
+        return WaitForSingleObject(m_hSema, 0) == 0;
+    }
+    
+    bool timed_wait(std::uint64_t usecs)
+    {
+        return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
+    }
+
+    void signal(int count = 1)
+    {
+        while (!ReleaseSemaphore(m_hSema, count, nullptr));
+    }
+};
+#elif defined(__MACH__)
+//---------------------------------------------------------
+// Semaphore (Apple iOS and OSX)
+// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+//---------------------------------------------------------
+class Semaphore
+{
+private:
+    semaphore_t m_sema;
+
+    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+    Semaphore(int initialCount = 0)
+    {
+        assert(initialCount >= 0);
+        kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+        assert(rc == KERN_SUCCESS);
+        (void)rc;
+    }
+
+    ~Semaphore()
+    {
+        semaphore_destroy(mach_task_self(), m_sema);
+    }
+
+    bool wait()
+    {
+        return semaphore_wait(m_sema) == KERN_SUCCESS;
+    }
+    
+    bool try_wait()
+    {
+        return timed_wait(0);
+    }
+    
+    bool timed_wait(std::uint64_t timeout_usecs)
+    {
+        mach_timespec_t ts;
+        ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
+        ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
+
+        // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+        kern_return_t rc = semaphore_timedwait(m_sema, ts);
+        return rc == KERN_SUCCESS;
+    }
+
+    void signal()
+    {
+        while (semaphore_signal(m_sema) != KERN_SUCCESS);
+    }
+
+    void signal(int count)
+    {
+        while (count-- > 0)
+        {
+            while (semaphore_signal(m_sema) != KERN_SUCCESS);
+        }
+    }
+};
+#elif defined(__unix__)
+//---------------------------------------------------------
+// Semaphore (POSIX, Linux)
+//---------------------------------------------------------
+class Semaphore
+{
+private:
+    sem_t m_sema;
+
+    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+    Semaphore(int initialCount = 0)
+    {
+        assert(initialCount >= 0);
+        int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
+        assert(rc == 0);
+        (void)rc;
+    }
+
+    ~Semaphore()
+    {
+        sem_destroy(&m_sema);
+    }
+
+    bool wait()
+    {
+        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+        int rc;
+        do {
+            rc = sem_wait(&m_sema);
+        } while (rc == -1 && errno == EINTR);
+        return rc == 0;
+    }
+
+    bool try_wait()
+    {
+        int rc;
+        do {
+            rc = sem_trywait(&m_sema);
+        } while (rc == -1 && errno == EINTR);
+        return rc == 0;
+    }
+
+    bool timed_wait(std::uint64_t usecs)
+    {
+        struct timespec ts;
+        const int usecs_in_1_sec = 1000000;
+        const int nsecs_in_1_sec = 1000000000;
+        clock_gettime(CLOCK_REALTIME, &ts);
+        ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
+        ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
+        // sem_timedwait bombs if you have more than 1e9 in tv_nsec
+        // so we have to clean things up before passing it in
+        if (ts.tv_nsec >= nsecs_in_1_sec) {
+            ts.tv_nsec -= nsecs_in_1_sec;
+            ++ts.tv_sec;
+        }
+
+        int rc;
+        do {
+            rc = sem_timedwait(&m_sema, &ts);
+        } while (rc == -1 && errno == EINTR);
+        return rc == 0;
+    }
+
+    void signal()
+    {
+        while (sem_post(&m_sema) == -1);
+    }
+
+    void signal(int count)
+    {
+        while (count-- > 0)
+        {
+            while (sem_post(&m_sema) == -1);
+        }
+    }
+};
+#else
+#error Unsupported platform! (No semaphore wrapper available)
+#endif
+
+}   // end namespace details
+
+
+//---------------------------------------------------------
+// LightweightSemaphore
+//---------------------------------------------------------
+class LightweightSemaphore
+{
+public:
+    typedef std::make_signed<std::size_t>::type ssize_t;
+
+private:
+    std::atomic<ssize_t> m_count;
+    details::Semaphore m_sema;
+    int m_maxSpins;
+
+    bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
+    {
+        ssize_t oldCount;
+        int spin = m_maxSpins;
+        while (--spin >= 0)
+        {
+            oldCount = m_count.load(std::memory_order_relaxed);
+            if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+                return true;
+            std::atomic_signal_fence(std::memory_order_acquire);     // Prevent the compiler from collapsing the loop.
+        }
+        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+        if (oldCount > 0)
+            return true;
+        if (timeout_usecs < 0)
+        {
+            if (m_sema.wait())
+                return true;
+        }
+        if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
+            return true;
+        // At this point, we've timed out waiting for the semaphore, but the
+        // count is still decremented indicating we may still be waiting on
+        // it. So we have to re-adjust the count, but only if the semaphore
+        // wasn't signaled enough times for us too since then. If it was, we
+        // need to release the semaphore too.
+        while (true)
+        {
+            oldCount = m_count.load(std::memory_order_acquire);
+            if (oldCount >= 0 && m_sema.try_wait())
+                return true;
+            if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
+                return false;
+        }
+    }
+
+    ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
+    {
+        assert(max > 0);
+        ssize_t oldCount;
+        int spin = m_maxSpins;
+        while (--spin >= 0)
+        {
+            oldCount = m_count.load(std::memory_order_relaxed);
+            if (oldCount > 0)
+            {
+                ssize_t newCount = oldCount > max ? oldCount - max : 0;
+                if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+                    return oldCount - newCount;
+            }
+            std::atomic_signal_fence(std::memory_order_acquire);
+        }
+        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+        if (oldCount <= 0)
+        {
+            if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
+            {
+                while (true)
+                {
+                    oldCount = m_count.load(std::memory_order_acquire);
+                    if (oldCount >= 0 && m_sema.try_wait())
+                        break;
+                    if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
+                        return 0;
+                }
+            }
+        }
+        if (max > 1)
+            return 1 + tryWaitMany(max - 1);
+        return 1;
+    }
+
+public:
+    LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins)
+    {
+        assert(initialCount >= 0);
+        assert(maxSpins >= 0);
+    }
+
+    bool tryWait()
+    {
+        ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+        while (oldCount > 0)
+        {
+            if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+                return true;
+        }
+        return false;
+    }
+
+    bool wait()
+    {
+        return tryWait() || waitWithPartialSpinning();
+    }
+
+    bool wait(std::int64_t timeout_usecs)
+    {
+        return tryWait() || waitWithPartialSpinning(timeout_usecs);
+    }
+
+    // Acquires between 0 and (greedily) max, inclusive
+    ssize_t tryWaitMany(ssize_t max)
+    {
+        assert(max >= 0);
+        ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+        while (oldCount > 0)
+        {
+            ssize_t newCount = oldCount > max ? oldCount - max : 0;
+            if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+                return oldCount - newCount;
+        }
+        return 0;
+    }
+
+    // Acquires at least one, and (greedily) at most max
+    ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
+    {
+        assert(max >= 0);
+        ssize_t result = tryWaitMany(max);
+        if (result == 0 && max > 0)
+            result = waitManyWithPartialSpinning(max, timeout_usecs);
+        return result;
+    }
+    
+    ssize_t waitMany(ssize_t max)
+    {
+        ssize_t result = waitMany(max, -1);
+        assert(result > 0);
+        return result;
+    }
+
+    void signal(ssize_t count = 1)
+    {
+        assert(count >= 0);
+        ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+        ssize_t toRelease = -oldCount < count ? -oldCount : count;
+        if (toRelease > 0)
+        {
+            m_sema.signal((int)toRelease);
+        }
+    }
+    
+    std::size_t availableApprox() const
+    {
+        ssize_t count = m_count.load(std::memory_order_relaxed);
+        return count > 0 ? static_cast<std::size_t>(count) : 0;
+    }
+};
+
+}   // end namespace moodycamel
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/events_executor.hpp b/rclcpp/include/rclcpp/experimental/executors/events_executor/events_executor.hpp
new file mode 100644
index 0000000000..dd5b1ebe63
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/events_executor.hpp
@@ -0,0 +1,294 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_EXECUTOR_HPP_
+#define RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_EXECUTOR_HPP_
+
+#include <atomic>
+#include <chrono>
+#include <memory>
+#include <vector>
+
+#include "rclcpp/executor.hpp"
+#include "rclcpp/executors/executor_entities_collection.hpp"
+#include "rclcpp/executors/executor_entities_collector.hpp"
+#include "rclcpp/experimental/executors/events_executor/events_executor_event_types.hpp"
+#include "rclcpp/experimental/executors/events_executor/events_queue.hpp"
+#include "rclcpp/experimental/executors/events_executor/simple_events_queue.hpp"
+#include "rclcpp/experimental/timers_manager.hpp"
+#include "rclcpp/node.hpp"
+
+namespace rclcpp
+{
+namespace experimental
+{
+namespace executors
+{
+
+/// Events executor implementation
+/**
+ * This executor uses an events queue and a timers manager to execute entities from its
+ * associated nodes and callback groups.
+ * ROS 2 entities allow to set callback functions that are invoked when the entity is triggered
+ * or has work to do. The events-executor sets these callbacks such that they push an
+ * event into its queue.
+ *
+ * This executor tries to reduce as much as possible the amount of maintenance operations.
+ * This allows to use customized `EventsQueue` classes to achieve different goals such
+ * as very low CPU usage, bounded memory requirement, determinism, etc.
+ *
+ * The executor uses a weak ownership model and it locks entities only while executing
+ * their related events.
+ *
+ * To run this executor:
+ * rclcpp::experimental::executors::EventsExecutor executor;
+ * executor.add_node(node);
+ * executor.spin();
+ * executor.remove_node(node);
+ */
+class EventsExecutor : public rclcpp::Executor
+{
+  friend class EventsExecutorEntitiesCollector;
+
+public:
+  RCLCPP_SMART_PTR_DEFINITIONS(EventsExecutor)
+
+  /// Default constructor. See the default constructor for Executor.
+  /**
+   * \param[in] events_queue The queue used to store events.
+   * \param[in] execute_timers_separate_thread If true, timers are executed in a separate
+   * thread. If false, timers are executed in the same thread as all other entities.
+   * \param[in] options Options used to configure the executor.
+   */
+  RCLCPP_PUBLIC
+  explicit EventsExecutor(
+    rclcpp::experimental::executors::EventsQueue::UniquePtr events_queue = std::make_unique<
+      rclcpp::experimental::executors::SimpleEventsQueue>(),
+    bool execute_timers_separate_thread = false,
+    const rclcpp::ExecutorOptions & options = rclcpp::ExecutorOptions());
+
+  /// Default destructor.
+  RCLCPP_PUBLIC
+  virtual ~EventsExecutor();
+
+  /// Events executor implementation of spin.
+  /**
+   * This function will block until work comes in, execute it, and keep blocking.
+   * It will only be interrupted by a CTRL-C (managed by the global signal handler).
+   * \throws std::runtime_error when spin() called while already spinning
+   */
+  RCLCPP_PUBLIC
+  void
+  spin() override;
+
+  /// Events executor implementation of spin some
+  /**
+   * This non-blocking function will execute the timers and events
+   * that were ready when this API was called, until timeout or no
+   * more work available. New ready-timers/events arrived while
+   * executing work, won't be taken into account here.
+   *
+   * Example:
+   *   while(condition) {
+   *     spin_some();
+   *     sleep(); // User should have some sync work or
+   *              // sleep to avoid a 100% CPU usage
+   *   }
+   */
+  RCLCPP_PUBLIC
+  void
+  spin_some(std::chrono::nanoseconds max_duration = std::chrono::nanoseconds(0)) override;
+
+  /// Events executor implementation of spin all
+  /**
+   * This non-blocking function will execute timers and events
+   * until timeout or no more work available. If new ready-timers/events
+   * arrive while executing work available, they will be executed
+   * as long as the timeout hasn't expired.
+   *
+   * Example:
+   *   while(condition) {
+   *     spin_all();
+   *     sleep(); // User should have some sync work or
+   *              // sleep to avoid a 100% CPU usage
+   *   }
+   */
+  RCLCPP_PUBLIC
+  void
+  spin_all(std::chrono::nanoseconds max_duration) override;
+
+  /// Add a node to the executor.
+  /**
+   * \sa rclcpp::Executor::add_node
+   */
+  RCLCPP_PUBLIC
+  void
+  add_node(
+    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
+    bool notify = true) override;
+
+  /// Convenience function which takes Node and forwards NodeBaseInterface.
+  /**
+   * \sa rclcpp::EventsExecutor::add_node
+   */
+  RCLCPP_PUBLIC
+  void
+  add_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify = true) override;
+
+  /// Remove a node from the executor.
+  /**
+   * \sa rclcpp::Executor::remove_node
+   */
+  RCLCPP_PUBLIC
+  void
+  remove_node(
+    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
+    bool notify = true) override;
+
+  /// Convenience function which takes Node and forwards NodeBaseInterface.
+  /**
+   * \sa rclcpp::Executor::remove_node
+   */
+  RCLCPP_PUBLIC
+  void
+  remove_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify = true) override;
+
+  /// Add a callback group to an executor.
+  /**
+   * \sa rclcpp::Executor::add_callback_group
+   */
+  RCLCPP_PUBLIC
+  void
+  add_callback_group(
+    rclcpp::CallbackGroup::SharedPtr group_ptr,
+    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
+    bool notify = true) override;
+
+  /// Remove callback group from the executor
+  /**
+   * \sa rclcpp::Executor::remove_callback_group
+   */
+  RCLCPP_PUBLIC
+  void
+  remove_callback_group(
+    rclcpp::CallbackGroup::SharedPtr group_ptr,
+    bool notify = true) override;
+
+  /// Get callback groups that belong to executor.
+  /**
+   * \sa rclcpp::Executor::get_all_callback_groups()
+   */
+  RCLCPP_PUBLIC
+  std::vector<rclcpp::CallbackGroup::WeakPtr>
+  get_all_callback_groups() override;
+
+  /// Get callback groups that belong to executor.
+  /**
+   * \sa rclcpp::Executor::get_manually_added_callback_groups()
+   */
+  RCLCPP_PUBLIC
+  std::vector<rclcpp::CallbackGroup::WeakPtr>
+  get_manually_added_callback_groups() override;
+
+  /// Get callback groups that belong to executor.
+  /**
+   * \sa rclcpp::Executor::get_automatically_added_callback_groups_from_nodes()
+   */
+  RCLCPP_PUBLIC
+  std::vector<rclcpp::CallbackGroup::WeakPtr>
+  get_automatically_added_callback_groups_from_nodes() override;
+
+protected:
+  /// Internal implementation of spin_once
+  RCLCPP_PUBLIC
+  void
+  spin_once_impl(std::chrono::nanoseconds timeout) override;
+
+  /// Internal implementation of spin_some
+  RCLCPP_PUBLIC
+  void
+  spin_some_impl(std::chrono::nanoseconds max_duration, bool exhaustive);
+
+private:
+  RCLCPP_DISABLE_COPY(EventsExecutor)
+
+  /// Execute a provided executor event if its associated entities are available
+  void
+  execute_event(const ExecutorEvent & event);
+
+  /// Collect entities from callback groups and refresh the current collection with them
+  void
+  refresh_current_collection_from_callback_groups();
+
+  /// Refresh the current collection using the provided new_collection
+  void
+  refresh_current_collection(const rclcpp::executors::ExecutorEntitiesCollection & new_collection);
+
+  /// Create a listener callback function for the provided entity
+  std::function<void(size_t)>
+  create_entity_callback(void * entity_key, ExecutorEventType type);
+
+  /// Create a listener callback function for the provided waitable entity
+  std::function<void(size_t, int)>
+  create_waitable_callback(const rclcpp::Waitable * waitable_id);
+
+  /// Utility to add the notify waitable to an entities collection
+  void
+  add_notify_waitable_to_collection(
+    rclcpp::executors::ExecutorEntitiesCollection::WaitableCollection & collection);
+
+  /// Searches for the provided entity_id in the collection and returns the entity if valid
+  template<typename CollectionType>
+  typename CollectionType::EntitySharedPtr
+  retrieve_entity(typename CollectionType::Key entity_id, CollectionType & collection)
+  {
+    // Check if the entity_id is in the collection
+    auto it = collection.find(entity_id);
+    if (it == collection.end()) {
+      return nullptr;
+    }
+
+    // Check if the entity associated with the entity_id is valid
+    // and remove it from the collection if it isn't
+    auto entity = it->second.entity.lock();
+    if (!entity) {
+      collection.erase(it);
+    }
+
+    // Return the retrieved entity (this can be a nullptr if the entity was not valid)
+    return entity;
+  }
+
+  /// Queue where entities can push events
+  rclcpp::experimental::executors::EventsQueue::UniquePtr events_queue_;
+
+  std::shared_ptr<rclcpp::executors::ExecutorEntitiesCollector> entities_collector_;
+  std::shared_ptr<rclcpp::executors::ExecutorNotifyWaitable> notify_waitable_;
+
+  /// Mutex to protect the current_entities_collection_
+  std::recursive_mutex collection_mutex_;
+  std::shared_ptr<rclcpp::executors::ExecutorEntitiesCollection> current_entities_collection_;
+
+  /// Flag used to reduce the number of unnecessary waitable events
+  std::atomic<bool> notify_waitable_event_pushed_ {false};
+
+  /// Timers manager used to track and/or execute associated timers
+  std::shared_ptr<rclcpp::experimental::TimersManager> timers_manager_;
+};
+
+}  // namespace executors
+}  // namespace experimental
+}  // namespace rclcpp
+
+#endif  // RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_EXECUTOR_HPP_
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/events_executor_event_types.hpp b/rclcpp/include/rclcpp/experimental/executors/events_executor/events_executor_event_types.hpp
new file mode 100644
index 0000000000..0da641ea6e
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/events_executor_event_types.hpp
@@ -0,0 +1,49 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_EXECUTOR_EVENT_TYPES_HPP_
+#define RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_EXECUTOR_EVENT_TYPES_HPP_
+
+#include <memory>
+
+namespace rclcpp
+{
+namespace experimental
+{
+namespace executors
+{
+
+enum ExecutorEventType
+{
+  CLIENT_EVENT,
+  SUBSCRIPTION_EVENT,
+  SERVICE_EVENT,
+  TIMER_EVENT,
+  WAITABLE_EVENT
+};
+
+struct ExecutorEvent
+{
+  const void * entity_key;
+  std::shared_ptr<void> data;
+  int waitable_data;
+  ExecutorEventType type;
+  size_t num_events;
+};
+
+}  // namespace executors
+}  // namespace experimental
+}  // namespace rclcpp
+
+#endif  // RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_EXECUTOR_EVENT_TYPES_HPP_
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/events_queue.hpp b/rclcpp/include/rclcpp/experimental/executors/events_executor/events_queue.hpp
new file mode 100644
index 0000000000..24282d6027
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/events_queue.hpp
@@ -0,0 +1,100 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_QUEUE_HPP_
+#define RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_QUEUE_HPP_
+
+#include <queue>
+
+#include "rclcpp/macros.hpp"
+#include "rclcpp/visibility_control.hpp"
+
+#include "rclcpp/experimental/executors/events_executor/events_executor_event_types.hpp"
+
+namespace rclcpp
+{
+namespace experimental
+{
+namespace executors
+{
+
+/**
+ * @brief This abstract class can be used to implement different types of queues
+ * where `ExecutorEvent` can be stored.
+ * The derived classes should choose which underlying container to use and
+ * the strategy for pushing and popping events.
+ * For example a queue implementation may be bounded or unbounded and have
+ * different pruning strategies.
+ * Implementations may or may not check the validity of events and decide how to handle
+ * the situation where an event is not valid anymore (e.g. a subscription history cache overruns)
+ */
+class EventsQueue
+{
+public:
+  RCLCPP_SMART_PTR_ALIASES_ONLY(EventsQueue)
+
+  RCLCPP_PUBLIC
+  EventsQueue() = default;
+
+  /**
+   * @brief Destruct the object.
+   */
+  RCLCPP_PUBLIC
+  virtual ~EventsQueue() = default;
+
+  /**
+   * @brief push event into the queue
+   * @param event The event to push into the queue
+   */
+  RCLCPP_PUBLIC
+  virtual
+  void
+  enqueue(const rclcpp::experimental::executors::ExecutorEvent & event) = 0;
+
+  /**
+   * @brief Extracts an event from the queue, eventually waiting until timeout
+   * if none is available.
+   * @return true if event has been found, false if timeout
+   */
+  RCLCPP_PUBLIC
+  virtual
+  bool
+  dequeue(
+    rclcpp::experimental::executors::ExecutorEvent & event,
+    std::chrono::nanoseconds timeout = std::chrono::nanoseconds::max()) = 0;
+
+  /**
+   * @brief Test whether queue is empty
+   * @return true if the queue's size is 0, false otherwise.
+   */
+  RCLCPP_PUBLIC
+  virtual
+  bool
+  empty() const = 0;
+
+  /**
+   * @brief Returns the number of elements in the queue.
+   * @return the number of elements in the queue.
+   */
+  RCLCPP_PUBLIC
+  virtual
+  size_t
+  size() const = 0;
+};
+
+}  // namespace executors
+}  // namespace experimental
+}  // namespace rclcpp
+
+#endif  // RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__EVENTS_QUEUE_HPP_
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/lock_free_events_queue.hpp b/rclcpp/include/rclcpp/experimental/executors/events_executor/lock_free_events_queue.hpp
new file mode 100644
index 0000000000..ad8250f9b5
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/lock_free_events_queue.hpp
@@ -0,0 +1,107 @@
+// Copyright 2022-2024 iRobot Corporation. All Rights Reserved
+
+#ifndef RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__LOCK_FREE_EVENTS_QUEUE_HPP_
+#define RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__LOCK_FREE_EVENTS_QUEUE_HPP_
+
+#include "rclcpp/experimental/executors/events_executor/concurrent_queue/blockingconcurrentqueue.h"
+#include "rclcpp/experimental/executors/events_executor/events_queue.hpp"
+
+namespace rclcpp
+{
+namespace experimental
+{
+namespace executors
+{
+
+/**
+ * @brief This class implements an EventsQueue as a simple wrapper around
+ * the blockingconcurrentqueue.h
+ * See https://github.com/cameron314/concurrentqueue
+ * It does not perform any checks about the size of queue, which can grow
+ * unbounded without being pruned. (there are options about this, read the docs).
+ * This implementation is lock free, producers and consumers can use the queue
+ * concurrently without the need for synchronization mechanisms. The use of this
+ * queue aims to fix the issue of publishers being blocked by the executor extracting
+ * events from the queue in a different thread, causing expensive mutex contention.
+ */
+class LockFreeEventsQueue : public EventsQueue
+{
+public:
+  RCLCPP_PUBLIC
+  ~LockFreeEventsQueue() override
+  {
+    // It's important that all threads have finished using the queue
+    // and the memory effects have fully propagated, before it is destructed.
+    // Consume all events
+    ExecutorEvent event;
+    while (event_queue_.try_dequeue(event)) {}
+  }
+
+  /**
+   * @brief enqueue event into the queue
+   * @param event The event to enqueue into the queue
+   */
+  RCLCPP_PUBLIC
+  void
+  enqueue(const ExecutorEvent & event) override
+  {
+    ExecutorEvent single_event = event;
+    single_event.num_events = 1;
+    for (size_t ev = 0; ev < event.num_events; ev++) {
+      event_queue_.enqueue(single_event);
+    }
+  }
+
+  /**
+   * @brief waits for an event until timeout
+   * @return true if event, false if timeout
+   */
+  RCLCPP_PUBLIC
+  bool
+  dequeue(
+    rclcpp::experimental::executors::ExecutorEvent & event,
+    std::chrono::nanoseconds timeout = std::chrono::nanoseconds::max()) override
+  {    
+    if (timeout != std::chrono::nanoseconds::max()) {
+      return event_queue_.wait_dequeue_timed(event, timeout);
+    }
+
+    // If no timeout specified, just wait for an event to arrive
+    event_queue_.wait_dequeue(event);
+    return true;
+  }
+
+  /**
+   * @brief Test whether queue is empty
+   * @return true if the queue's size is 0, false otherwise.
+   */
+  RCLCPP_PUBLIC
+  bool
+  empty() const override
+  {
+    return event_queue_.size_approx() == 0;
+  }
+
+  /**
+   * @brief Returns the number of elements in the queue.
+   * This estimate is only accurate if the queue has completely
+   * stabilized before it is called
+   * @return the number of elements in the queue.
+   */
+  RCLCPP_PUBLIC
+  size_t
+  size() const override
+  {
+    return event_queue_.size_approx();
+  }
+
+private:
+  moodycamel::BlockingConcurrentQueue<rclcpp::experimental::executors::ExecutorEvent> event_queue_;
+};
+
+}  // namespace executors
+}  // namespace experimental
+}  // namespace rclcpp
+
+
+#endif  // RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__LOCK_FREE_EVENTS_QUEUE_HPP_
diff --git a/rclcpp/include/rclcpp/experimental/executors/events_executor/simple_events_queue.hpp b/rclcpp/include/rclcpp/experimental/executors/events_executor/simple_events_queue.hpp
new file mode 100644
index 0000000000..7b18a95fcf
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/executors/events_executor/simple_events_queue.hpp
@@ -0,0 +1,134 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__SIMPLE_EVENTS_QUEUE_HPP_
+#define RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__SIMPLE_EVENTS_QUEUE_HPP_
+
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <utility>
+
+#include "rclcpp/experimental/executors/events_executor/events_queue.hpp"
+
+namespace rclcpp
+{
+namespace experimental
+{
+namespace executors
+{
+
+/**
+ * @brief This class implements an EventsQueue as a simple wrapper around a std::queue.
+ * It does not perform any checks about the size of queue, which can grow
+ * unbounded without being pruned.
+ * The simplicity of this implementation makes it suitable for optimizing CPU usage.
+ */
+class SimpleEventsQueue : public EventsQueue
+{
+public:
+  RCLCPP_PUBLIC
+  ~SimpleEventsQueue() override = default;
+
+  /**
+   * @brief enqueue event into the queue
+   * Thread safe
+   * @param event The event to enqueue into the queue
+   */
+  RCLCPP_PUBLIC
+  void
+  enqueue(const rclcpp::experimental::executors::ExecutorEvent & event) override
+  {
+    rclcpp::experimental::executors::ExecutorEvent single_event = event;
+    single_event.num_events = 1;
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      for (size_t ev = 0; ev < event.num_events; ev++) {
+        event_queue_.push(single_event);
+      }
+    }
+    events_queue_cv_.notify_one();
+  }
+
+  /**
+   * @brief waits for an event until timeout, gets a single event
+   * Thread safe
+   * @return true if event, false if timeout
+   */
+  RCLCPP_PUBLIC
+  bool
+  dequeue(
+    rclcpp::experimental::executors::ExecutorEvent & event,
+    std::chrono::nanoseconds timeout = std::chrono::nanoseconds::max()) override
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+
+    // Initialize to true because it's only needed if we have a valid timeout
+    bool has_data = true;
+    if (timeout != std::chrono::nanoseconds::max()) {
+      has_data =
+        events_queue_cv_.wait_for(lock, timeout, [this]() {return !event_queue_.empty();});
+    } else {
+      events_queue_cv_.wait(lock, [this]() {return !event_queue_.empty();});
+    }
+
+    if (has_data) {
+      event = event_queue_.front();
+      event_queue_.pop();
+      return true;
+    }
+
+    return false;
+  }
+
+  /**
+   * @brief Test whether queue is empty
+   * Thread safe
+   * @return true if the queue's size is 0, false otherwise.
+   */
+  RCLCPP_PUBLIC
+  bool
+  empty() const override
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+    return event_queue_.empty();
+  }
+
+  /**
+   * @brief Returns the number of elements in the queue.
+   * Thread safe
+   * @return the number of elements in the queue.
+   */
+  RCLCPP_PUBLIC
+  size_t
+  size() const override
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+    return event_queue_.size();
+  }
+
+private:
+  // The underlying queue implementation
+  std::queue<rclcpp::experimental::executors::ExecutorEvent> event_queue_;
+  // Mutex to protect read/write access to the queue
+  mutable std::mutex mutex_;
+  // Variable used to notify when an event is added to the queue
+  std::condition_variable events_queue_cv_;
+};
+
+}  // namespace executors
+}  // namespace experimental
+}  // namespace rclcpp
+
+#endif  // RCLCPP__EXPERIMENTAL__EXECUTORS__EVENTS_EXECUTOR__SIMPLE_EVENTS_QUEUE_HPP_
diff --git a/rclcpp/include/rclcpp/experimental/subscription_intra_process_buffer.hpp b/rclcpp/include/rclcpp/experimental/subscription_intra_process_buffer.hpp
index 3c71512677..3052cdbc49 100644
--- a/rclcpp/include/rclcpp/experimental/subscription_intra_process_buffer.hpp
+++ b/rclcpp/include/rclcpp/experimental/subscription_intra_process_buffer.hpp
@@ -30,6 +30,7 @@
 #include "rclcpp/experimental/ros_message_intra_process_buffer.hpp"
 #include "rclcpp/qos.hpp"
 #include "rclcpp/type_support_decl.hpp"
+#include "rclcpp/detail/add_guard_condition_to_rcl_wait_set.hpp"
 
 namespace rclcpp
 {
@@ -93,6 +94,15 @@ class SubscriptionIntraProcessBuffer : public SubscriptionROSMsgIntraProcessBuff
       std::make_shared<Alloc>(subscribed_type_allocator_));
   }
 
+  void
+  add_to_wait_set(rcl_wait_set_t * wait_set) override
+  {
+    if (this->buffer_->has_data()) {
+      this->trigger_guard_condition();
+    }
+    detail::add_guard_condition_to_rcl_wait_set(*wait_set, this->gc_);
+  }
+
   bool
   is_ready(rcl_wait_set_t * wait_set) override
   {
diff --git a/rclcpp/include/rclcpp/experimental/timers_manager.hpp b/rclcpp/include/rclcpp/experimental/timers_manager.hpp
new file mode 100644
index 0000000000..af3337bfd6
--- /dev/null
+++ b/rclcpp/include/rclcpp/experimental/timers_manager.hpp
@@ -0,0 +1,558 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RCLCPP__EXPERIMENTAL__TIMERS_MANAGER_HPP_
+#define RCLCPP__EXPERIMENTAL__TIMERS_MANAGER_HPP_
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <utility>
+#include <vector>
+#include "rclcpp/context.hpp"
+#include "rclcpp/timer.hpp"
+
+namespace rclcpp
+{
+namespace experimental
+{
+
+/**
+ * @brief This class provides a way for storing and executing timer objects.
+ * It provides APIs to suit the needs of different applications and execution models.
+ * All public APIs provided by this class are thread-safe.
+ *
+ * Timers management
+ * This class provides APIs to add/remove timers to/from an internal storage.
+ * It keeps a list of weak pointers from added timers, and locks them only when
+ * they need to be executed or modified.
+ * Timers are kept ordered in a binary-heap priority queue.
+ * Calls to add/remove APIs will temporarily block the execution of the timers and
+ * will require to reorder the internal priority queue.
+ * Because of this, they have a not-negligible impact on the performance.
+ *
+ * Timers execution
+ * The most efficient use of this class consists in letting a TimersManager object
+ * to spawn a thread where timers are monitored and optionally executed.
+ * This can be controlled via the `start` and `stop` methods.
+ * Ready timers can either be executed or an on_ready_callback can be used to notify
+ * other entities that they are ready and need to be executed.
+ * Other APIs allow to directly execute a given timer.
+ *
+ * This class assumes that the `execute_callback()` API of the stored timers is never
+ * called by other entities, but it can only be called from here.
+ * If this assumption is not respected, the heap property may be invalidated,
+ * so timers may be executed out of order, without this object noticing it.
+ *
+ */
+class TimersManager
+{
+public:
+  RCLCPP_SMART_PTR_DEFINITIONS_NOT_COPYABLE(TimersManager)
+
+  /**
+   * @brief Construct a new TimersManager object
+   *
+   * @param context custom context to be used.
+   * Shared ownership of the context is held until destruction.
+   * @param on_ready_callback The timers on ready callback. The presence of this function
+   * indicates what to do when the TimersManager is running and a timer becomes ready.
+   * The TimersManager is considered "running" when the `start` method has been called.
+   * If it's callable, it will be invoked instead of the timer callback.
+   * If it's not callable, then the TimersManager will
+   * directly execute timers when they are ready.
+   * All the methods that execute a given timer (e.g. `execute_head_timer`
+   * or `execute_ready_timer`) without the TimersManager being `running`, i.e.
+   * without actually explicitly waiting for the timer to become ready, will ignore this
+   * callback.
+   */
+  RCLCPP_PUBLIC
+  TimersManager(
+    std::shared_ptr<rclcpp::Context> context,
+    std::function<void(const rclcpp::TimerBase *,
+    const std::shared_ptr<void> &)> on_ready_callback = nullptr);
+
+  /**
+   * @brief Destruct the TimersManager object making sure to stop thread and release memory.
+   */
+  RCLCPP_PUBLIC
+  ~TimersManager();
+
+  /**
+   * @brief Adds a new timer to the storage, maintaining weak ownership of it.
+   * Function is thread safe and it can be called regardless of the state of the timers thread.
+   *
+   * @param timer the timer to add.
+   * @throws std::invalid_argument if timer is a nullptr.
+   */
+  RCLCPP_PUBLIC
+  void add_timer(rclcpp::TimerBase::SharedPtr timer);
+
+  /**
+   * @brief Remove a single timer from the object storage.
+   * Will do nothing if the timer was not being stored here.
+   * Function is thread safe and it can be called regardless of the state of the timers thread.
+   *
+   * @param timer the timer to remove.
+   */
+  RCLCPP_PUBLIC
+  void remove_timer(rclcpp::TimerBase::SharedPtr timer);
+
+  /**
+   * @brief Remove all the timers stored in the object.
+   * Function is thread safe and it can be called regardless of the state of the timers thread.
+   */
+  RCLCPP_PUBLIC
+  void clear();
+
+  /**
+   * @brief Starts a thread that takes care of executing the timers stored in this object.
+   * Function will throw an error if the timers thread was already running.
+   */
+  RCLCPP_PUBLIC
+  void start();
+
+  /**
+   * @brief Stops the timers thread.
+   * Will do nothing if the timer thread was not running.
+   */
+  RCLCPP_PUBLIC
+  void stop();
+
+  /**
+   * @brief Get the number of timers that are currently ready.
+   * This function is thread safe.
+   *
+   * @return size_t number of ready timers.
+   * @throws std::runtime_error if the timers thread was already running.
+   */
+  RCLCPP_PUBLIC
+  size_t get_number_ready_timers();
+
+  /**
+   * @brief Executes head timer if ready.
+   * This function is thread safe.
+   * This function will try to execute the timer callback regardless of whether
+   * the TimersManager on_ready_callback was passed during construction.
+   *
+   * @return true if head timer was ready.
+   * @throws std::runtime_error if the timers thread was already running.
+   */
+  RCLCPP_PUBLIC
+  bool execute_head_timer();
+
+  /**
+   * @brief Executes timer identified by its ID.
+   * This function is thread safe.
+   * This function will try to execute the timer callback regardless of whether
+   * the TimersManager on_ready_callback was passed during construction.
+   *
+   * @param timer_id the timer ID of the timer to execute
+   * @param data internal data of the timer
+   */
+  RCLCPP_PUBLIC
+  void execute_ready_timer(const rclcpp::TimerBase * timer_id, const std::shared_ptr<void> & data);
+
+  /**
+   * @brief Get the amount of time before the next timer triggers.
+   * This function is thread safe.
+   *
+   * @return std::optional<std::chrono::nanoseconds> to wait,
+   * the returned value could be negative if the timer is already expired
+   * or std::chrono::nanoseconds::max() if there are no timers stored in the object.
+   * If the head timer was cancelled, then this will return a nullopt.
+   * @throws std::runtime_error if the timers thread was already running.
+   */
+  RCLCPP_PUBLIC
+  std::optional<std::chrono::nanoseconds> get_head_timeout();
+
+private:
+  RCLCPP_DISABLE_COPY(TimersManager)
+
+  using TimerPtr = rclcpp::TimerBase::SharedPtr;
+  using WeakTimerPtr = rclcpp::TimerBase::WeakPtr;
+
+  // Forward declaration
+  class TimersHeap;
+
+  /**
+   * @brief This class allows to store weak pointers to timers in a heap-like data structure.
+   * The root of the heap is the timer that triggers first.
+   * Since this class uses weak ownership, it is not guaranteed that it represents a valid heap
+   * at any point in time as timers could go out of scope, thus invalidating it.
+   * The "validate_and_lock" API allows to restore the heap property and also returns a locked version
+   * of the timers heap.
+   * This class is not thread safe and requires external mutexes to protect its usage.
+   */
+  class WeakTimersHeap
+  {
+public:
+    /**
+     * @brief Add a new timer to the heap. After the addition, the heap property is enforced.
+     *
+     * @param timer new timer to add.
+     * @return true if timer has been added, false if it was already there.
+     */
+    bool add_timer(TimerPtr timer)
+    {
+      TimersHeap locked_heap = this->validate_and_lock();
+      bool added = locked_heap.add_timer(std::move(timer));
+
+      if (added) {
+        // Re-create the weak heap with the new timer added
+        this->store(locked_heap);
+      }
+
+      return added;
+    }
+
+    /**
+     * @brief Remove a timer from the heap. After the removal, the heap property is enforced.
+     *
+     * @param timer timer to remove.
+     * @return true if timer has been removed, false if it was not there.
+     */
+    bool remove_timer(TimerPtr timer)
+    {
+      TimersHeap locked_heap = this->validate_and_lock();
+      bool removed = locked_heap.remove_timer(std::move(timer));
+
+      if (removed) {
+        // Re-create the weak heap with the timer removed
+        this->store(locked_heap);
+      }
+
+      return removed;
+    }
+
+    /**
+     * @brief Retrieve the timer identified by the key
+     * @param timer_id The ID of the timer to retrieve.
+     * @return TimerPtr if there's a timer associated with the ID, nullptr otherwise
+     */
+    TimerPtr get_timer(const rclcpp::TimerBase * timer_id)
+    {
+      for (auto & weak_timer : weak_heap_) {
+        auto timer = weak_timer.lock();
+        if (timer.get() == timer_id) {
+          return timer;
+        }
+      }
+      return nullptr;
+    }
+
+    /**
+     * @brief Returns a const reference to the front element.
+     */
+    const WeakTimerPtr & front() const
+    {
+      return weak_heap_.front();
+    }
+
+    /**
+     * @brief Returns whether the heap is empty or not.
+     */
+    bool empty() const
+    {
+      return weak_heap_.empty();
+    }
+
+    /**
+     * @brief This function restores the current object as a valid heap
+     * and it returns a locked version of it.
+     * Timers that went out of scope are removed from the container.
+     * It is the only public API to access and manipulate the stored timers.
+     *
+     * @return TimersHeap owned timers corresponding to the current object
+     */
+    TimersHeap validate_and_lock()
+    {
+      TimersHeap locked_heap;
+      bool any_timer_destroyed = false;
+
+      for (auto weak_timer : weak_heap_) {
+        auto timer = weak_timer.lock();
+        if (timer) {
+          // This timer is valid, so add it to the locked heap
+          // Note: we access friend private `owned_heap_` member field.
+          locked_heap.owned_heap_.push_back(std::move(timer));
+        } else {
+          // This timer went out of scope, so we don't add it to locked heap
+          // and we mark the corresponding flag.
+          // It's not needed to erase it from weak heap, as we are going to re-heapify.
+          // Note: we can't exit from the loop here, as we need to find all valid timers.
+          any_timer_destroyed = true;
+        }
+      }
+
+      // If a timer has gone out of scope, then the remaining elements do not represent
+      // a valid heap anymore. We need to re-heapify the timers heap.
+      if (any_timer_destroyed) {
+        locked_heap.heapify();
+        // Re-create the weak heap now that elements have been heapified again
+        this->store(locked_heap);
+      }
+
+      return locked_heap;
+    }
+
+    /**
+     * @brief This function allows to recreate the heap of weak pointers
+     * from an heap of owned pointers.
+     * It is required to be called after a locked TimersHeap generated from this object
+     * has been modified in any way (e.g. timers triggered, added, removed).
+     *
+     * @param heap timers heap to store as weak pointers
+     */
+    void store(const TimersHeap & heap)
+    {
+      weak_heap_.clear();
+      // Note: we access friend private `owned_heap_` member field.
+      for (auto t : heap.owned_heap_) {
+        weak_heap_.push_back(t);
+      }
+    }
+
+    /**
+     * @brief Remove all timers from the heap.
+     */
+    void clear()
+    {
+      weak_heap_.clear();
+    }
+
+private:
+    std::vector<WeakTimerPtr> weak_heap_;
+  };
+
+  /**
+   * @brief This class is the equivalent of WeakTimersHeap but with ownership of the timers.
+   * It can be generated by locking the weak version.
+   * It provides operations to manipulate the heap.
+   * This class is not thread safe and requires external mutexes to protect its usage.
+   */
+  class TimersHeap
+  {
+public:
+    /**
+     * @brief Try to add a new timer to the heap.
+     * After the addition, the heap property is preserved.
+     * @param timer new timer to add.
+     * @return true if timer has been added, false if it was already there.
+     */
+    bool add_timer(TimerPtr timer)
+    {
+      // Nothing to do if the timer is already stored here
+      auto it = std::find(owned_heap_.begin(), owned_heap_.end(), timer);
+      if (it != owned_heap_.end()) {
+        return false;
+      }
+
+      owned_heap_.push_back(std::move(timer));
+      std::push_heap(owned_heap_.begin(), owned_heap_.end(), timer_greater);
+
+      return true;
+    }
+
+    /**
+     * @brief Try to remove a timer from the heap.
+     * After the removal, the heap property is preserved.
+     * @param timer timer to remove.
+     * @return true if timer has been removed, false if it was not there.
+     */
+    bool remove_timer(TimerPtr timer)
+    {
+      // Nothing to do if the timer is not stored here
+      auto it = std::find(owned_heap_.begin(), owned_heap_.end(), timer);
+      if (it == owned_heap_.end()) {
+        return false;
+      }
+
+      owned_heap_.erase(it);
+      this->heapify();
+
+      return true;
+    }
+
+    /**
+     * @brief Returns a reference to the front element.
+     * @return reference to front element.
+     */
+    TimerPtr & front()
+    {
+      return owned_heap_.front();
+    }
+
+    /**
+     * @brief Returns a const reference to the front element.
+     * @return const reference to front element.
+     */
+    const TimerPtr & front() const
+    {
+      return owned_heap_.front();
+    }
+
+    /**
+     * @brief Returns whether the heap is empty or not.
+     * @return true if the heap is empty.
+     */
+    bool empty() const
+    {
+      return owned_heap_.empty();
+    }
+
+    /**
+     * @brief Returns the size of the heap.
+     * @return the number of valid timers in the heap.
+     */
+    size_t size() const
+    {
+      return owned_heap_.size();
+    }
+
+    /**
+     * @brief Get the number of timers that are currently ready.
+     * @return size_t number of ready timers.
+     */
+    size_t get_number_ready_timers() const
+    {
+      size_t ready_timers = 0;
+
+      for (TimerPtr t : owned_heap_) {
+        if (t->is_ready()) {
+          ready_timers++;
+        }
+      }
+
+      return ready_timers;
+    }
+
+    /**
+    * @brief Restore a valid heap after the root value has been replaced (e.g. timer triggered).
+    */
+    void heapify_root()
+    {
+      // The following code is a more efficient version than doing
+      // pop_heap, pop_back, push_back, push_heap
+      // as it removes the need for the last push_heap
+
+      // Push the modified element (i.e. the current root) at the bottom of the heap
+      owned_heap_.push_back(owned_heap_[0]);
+      // Exchange first and last-1 elements and reheapify
+      std::pop_heap(owned_heap_.begin(), owned_heap_.end(), timer_greater);
+      // Remove last element
+      owned_heap_.pop_back();
+    }
+
+    /**
+     * @brief Completely restores the structure to a valid heap
+     */
+    void heapify()
+    {
+      std::make_heap(owned_heap_.begin(), owned_heap_.end(), timer_greater);
+    }
+
+    /**
+     * @brief Helper function to clear the "on_reset_callback" on all associated timers.
+     */
+    void clear_timers_on_reset_callbacks()
+    {
+      for (TimerPtr & t : owned_heap_) {
+        t->clear_on_reset_callback();
+      }
+    }
+
+    /**
+     * @brief Friend declaration to allow the `validate_and_lock()` function to access the
+     * underlying heap container
+     */
+    friend TimersHeap WeakTimersHeap::validate_and_lock();
+
+    /**
+     * @brief Friend declaration to allow the `store()` function to access the
+     * underlying heap container
+     */
+    friend void WeakTimersHeap::store(const TimersHeap & heap);
+
+private:
+    /**
+     * @brief Comparison function between timers.
+     * @return true if `a` triggers after `b`.
+     */
+    static bool timer_greater(TimerPtr a, TimerPtr b)
+    {
+      // TODO(alsora): this can cause an error if timers are using different clocks
+      return a->time_until_trigger() > b->time_until_trigger();
+    }
+
+    std::vector<TimerPtr> owned_heap_;
+  };
+
+  /**
+   * @brief Implements a loop that keeps executing ready timers.
+   * This function is executed in the timers thread.
+   */
+  void run_timers();
+
+  /**
+   * @brief Get the amount of time before the next timer triggers.
+   * This function is not thread safe, acquire a mutex before calling it.
+   *
+   * @return std::optional<std::chrono::nanoseconds> to wait,
+   * the returned value could be negative if the timer is already expired
+   * or std::chrono::nanoseconds::max() if the heap is empty.
+   * If the head timer was cancelled, then this will return a nullopt.
+   * This function is not thread safe, acquire the timers_mutex_ before calling it.
+   */
+  std::optional<std::chrono::nanoseconds> get_head_timeout_unsafe();
+
+  /**
+   * @brief Executes all the timers currently ready when the function is invoked
+   * while keeping the heap correctly sorted.
+   * This function is not thread safe, acquire the timers_mutex_ before calling it.
+   */
+  void execute_ready_timers_unsafe();
+
+  // Callback to be called when timer is ready
+  std::function<void(const rclcpp::TimerBase *,
+    const std::shared_ptr<void> &)> on_ready_callback_ = nullptr;
+
+  // Thread used to run the timers execution task
+  std::thread timers_thread_;
+  // Protects access to timers
+  std::mutex timers_mutex_;
+  // Protects access to stop()
+  std::mutex stop_mutex_;
+  // Notifies the timers thread whenever timers are added/removed
+  std::condition_variable timers_cv_;
+  // Flag used as predicate by timers_cv_ that denotes one or more timers being added/removed
+  bool timers_updated_ {false};
+  // Indicates whether the timers thread is currently running or not
+  std::atomic<bool> running_ {false};
+  // Parent context used to understand if ROS is still active
+  std::shared_ptr<rclcpp::Context> context_;
+  // Timers heap storage with weak ownership
+  WeakTimersHeap weak_timers_heap_;
+};
+
+}  // namespace experimental
+}  // namespace rclcpp
+
+#endif  // RCLCPP__EXPERIMENTAL__TIMERS_MANAGER_HPP_
diff --git a/rclcpp/include/rclcpp/guard_condition.hpp b/rclcpp/include/rclcpp/guard_condition.hpp
index f6f5af9586..350f306010 100644
--- a/rclcpp/include/rclcpp/guard_condition.hpp
+++ b/rclcpp/include/rclcpp/guard_condition.hpp
@@ -72,7 +72,7 @@ class GuardCondition
   const rcl_guard_condition_t &
   get_rcl_guard_condition() const;
 
-  /// Notify the wait set waiting on this condition, if any, that the condition had been met.
+  /// Signal that the condition has been met, notifying both the wait set and listeners, if any.
   /**
    * This function is thread-safe, and may be called concurrently with waiting
    * on this guard condition in a wait set.
@@ -107,6 +107,22 @@ class GuardCondition
   void
   add_to_wait_set(rcl_wait_set_t * wait_set);
 
+  /// Set a callback to be called whenever the guard condition is triggered.
+  /**
+   * The callback receives a size_t which is the number of times the guard condition was triggered
+   * since the last time this callback was called.
+   * Normally this is 1, but can be > 1 if the guard condition was triggered before any
+   * callback was set.
+   *
+   * Calling it again will clear any previously set callback.
+   *
+   * This function is thread-safe.
+   *
+   * If you want more information available in the callback, like the guard condition
+   * or other information, you may use a lambda with captures or std::bind.
+   *
+   * \param[in] callback functor to be called when the guard condition is triggered
+   */
   RCLCPP_PUBLIC
   void
   set_on_trigger_callback(std::function<void(size_t)> callback);
diff --git a/rclcpp/include/rclcpp/strategies/allocator_memory_strategy.hpp b/rclcpp/include/rclcpp/strategies/allocator_memory_strategy.hpp
index 46379744a1..41e8ec5d86 100644
--- a/rclcpp/include/rclcpp/strategies/allocator_memory_strategy.hpp
+++ b/rclcpp/include/rclcpp/strategies/allocator_memory_strategy.hpp
@@ -368,7 +368,8 @@ class AllocatorMemoryStrategy : public memory_strategy::MemoryStrategy
           ++it;
           continue;
         }
-        if (!timer->call()) {
+        auto data = timer->call();
+        if (!data) {
           // timer was cancelled, skip it.
           ++it;
           continue;
@@ -377,6 +378,7 @@ class AllocatorMemoryStrategy : public memory_strategy::MemoryStrategy
         any_exec.timer = timer;
         any_exec.callback_group = group;
         any_exec.node_base = get_node_by_group(group, weak_groups_to_nodes);
+        any_exec.data = data;
         timer_handles_.erase(it);
         return;
       }
diff --git a/rclcpp/include/rclcpp/timer.hpp b/rclcpp/include/rclcpp/timer.hpp
index 8af0c6fb68..0ccd18c951 100644
--- a/rclcpp/include/rclcpp/timer.hpp
+++ b/rclcpp/include/rclcpp/timer.hpp
@@ -18,6 +18,7 @@
 #include <atomic>
 #include <chrono>
 #include <functional>
+#include <optional>
 #include <memory>
 #include <sstream>
 #include <thread>
@@ -43,6 +44,12 @@
 namespace rclcpp
 {
 
+struct TimerInfo
+{
+  Time expected_call_time;
+  Time actual_call_time;
+};
+
 class TimerBase
 {
 public:
@@ -96,16 +103,20 @@ class TimerBase
    * The multithreaded executor takes advantage of this to avoid scheduling
    * the callback multiple times.
    *
-   * \return `true` if the callback should be executed, `false` if the timer was canceled.
+   * \return a valid shared_ptr if the callback should be executed,
+   *         an invalid shared_ptr (nullptr) if the timer was canceled.
    */
   RCLCPP_PUBLIC
-  virtual bool
+  virtual std::shared_ptr<void>
   call() = 0;
 
   /// Call the callback function when the timer signal is emitted.
+  /**
+   * \param[in] data the pointer returned by the call function
+   */
   RCLCPP_PUBLIC
   virtual void
-  execute_callback() = 0;
+  execute_callback(const std::shared_ptr<void> & data) = 0;
 
   RCLCPP_PUBLIC
   std::shared_ptr<const rcl_timer_t>
@@ -193,16 +204,17 @@ class TimerBase
   set_on_reset_callback(rcl_event_callback_t callback, const void * user_data);
 };
 
-
 using VoidCallbackType = std::function<void ()>;
 using TimerCallbackType = std::function<void (TimerBase &)>;
+using TimerInfoCallbackType = std::function<void (const TimerInfo &)>;
 
 /// Generic timer. Periodically executes a user-specified callback.
 template<
   typename FunctorT,
   typename std::enable_if<
     rclcpp::function_traits::same_arguments<FunctorT, VoidCallbackType>::value ||
-    rclcpp::function_traits::same_arguments<FunctorT, TimerCallbackType>::value
+    rclcpp::function_traits::same_arguments<FunctorT, TimerCallbackType>::value ||
+    rclcpp::function_traits::same_arguments<FunctorT, TimerInfoCallbackType>::value
   >::type * = nullptr
 >
 class GenericTimer : public TimerBase
@@ -244,27 +256,28 @@ class GenericTimer : public TimerBase
    * \sa rclcpp::TimerBase::call
    * \throws std::runtime_error if it failed to notify timer that callback will occurr
    */
-  bool
+  std::shared_ptr<void>
   call() override
   {
-    rcl_ret_t ret = rcl_timer_call(timer_handle_.get());
+    auto timer_call_info_ = std::make_shared<rcl_timer_call_info_t>();
+    rcl_ret_t ret = rcl_timer_call_with_info(timer_handle_.get(), timer_call_info_.get());
     if (ret == RCL_RET_TIMER_CANCELED) {
-      return false;
+      return nullptr;
     }
     if (ret != RCL_RET_OK) {
       throw std::runtime_error("Failed to notify timer that callback occurred");
     }
-    return true;
+    return timer_call_info_;
   }
 
   /**
    * \sa rclcpp::TimerBase::execute_callback
    */
   void
-  execute_callback() override
+  execute_callback(const std::shared_ptr<void> & data) override
   {
     TRACEPOINT(callback_start, reinterpret_cast<const void *>(&callback_), false);
-    execute_callback_delegate<>();
+    execute_callback_delegate<>(*static_cast<rcl_timer_call_info_t *>(data.get()));
     TRACEPOINT(callback_end, reinterpret_cast<const void *>(&callback_));
   }
 
@@ -276,7 +289,7 @@ class GenericTimer : public TimerBase
     >::type * = nullptr
   >
   void
-  execute_callback_delegate()
+  execute_callback_delegate(const rcl_timer_call_info_t &)
   {
     callback_();
   }
@@ -288,11 +301,26 @@ class GenericTimer : public TimerBase
     >::type * = nullptr
   >
   void
-  execute_callback_delegate()
+  execute_callback_delegate(const rcl_timer_call_info_t &)
   {
     callback_(*this);
   }
 
+
+  template<
+    typename CallbackT = FunctorT,
+    typename std::enable_if<
+      rclcpp::function_traits::same_arguments<CallbackT, TimerInfoCallbackType>::value
+    >::type * = nullptr
+  >
+  void
+  execute_callback_delegate(const rcl_timer_call_info_t & timer_call_info_)
+  {
+    const TimerInfo info{Time{timer_call_info_.expected_call_time, clock_->get_clock_type()},
+      Time{timer_call_info_.actual_call_time, clock_->get_clock_type()}};
+    callback_(info);
+  }
+
   /// Is the clock steady (i.e. is the time between ticks constant?)
   /** \return True if the clock used by this timer is steady. */
   bool
@@ -311,7 +339,8 @@ template<
   typename FunctorT,
   typename std::enable_if<
     rclcpp::function_traits::same_arguments<FunctorT, VoidCallbackType>::value ||
-    rclcpp::function_traits::same_arguments<FunctorT, TimerCallbackType>::value
+    rclcpp::function_traits::same_arguments<FunctorT, TimerCallbackType>::value ||
+    rclcpp::function_traits::same_arguments<FunctorT, TimerInfoCallbackType>::value
   >::type * = nullptr
 >
 class WallTimer : public GenericTimer<FunctorT>
diff --git a/rclcpp/include/rclcpp/wait_result.hpp b/rclcpp/include/rclcpp/wait_result.hpp
index e879043d04..429eb1dd25 100644
--- a/rclcpp/include/rclcpp/wait_result.hpp
+++ b/rclcpp/include/rclcpp/wait_result.hpp
@@ -17,13 +17,21 @@
 
 #include <cassert>
 #include <functional>
+#include <iostream>
+#include <memory>
 #include <stdexcept>
+#include <utility>
 
 #include "rcl/wait.h"
 
 #include "rclcpp/macros.hpp"
 #include "rclcpp/wait_result_kind.hpp"
 
+#include "rclcpp/client.hpp"
+#include "rclcpp/service.hpp"
+#include "rclcpp/subscription_base.hpp"
+#include "rclcpp/timer.hpp"
+
 namespace rclcpp
 {
 
@@ -134,6 +142,151 @@ class WaitResult final
     }
   }
 
+  /// Get the next ready timer and its index in the wait result, but do not clear it.
+  /**
+   * The returned timer is not cleared automatically, as it the case with the
+   * other next_ready_*()-like functions.
+   * Instead, this function returns the timer and the index that identifies it
+   * in the wait result, so that it can be cleared (marked as taken or used)
+   * in a separate step with clear_timer_with_index().
+   * This is necessary in some multi-threaded executor implementations.
+   *
+   * If the timer is not cleared using the index, subsequent calls to this
+   * function will return the same timer.
+   *
+   * If there is no ready timer, then nullptr will be returned and the index
+   * will be invalid and should not be used.
+   *
+   * \param[in] start_index index at which to start searching for the next ready
+   *   timer in the wait result. If the start_index is out of bounds for the
+   *   list of timers in the wait result, then {nullptr, start_index} will be
+   *   returned. Defaults to 0.
+   * \return next ready timer pointer and its index in the wait result, or
+   *   {nullptr, start_index} if none was found.
+   */
+  std::pair<std::shared_ptr<rclcpp::TimerBase>, size_t>
+  peek_next_ready_timer(size_t start_index = 0)
+  {
+    check_wait_result_dirty();
+    auto ret = std::shared_ptr<rclcpp::TimerBase>{nullptr};
+    size_t ii = start_index;
+    if (this->kind() == WaitResultKind::Ready) {
+      auto & wait_set = this->get_wait_set();
+      auto & rcl_wait_set = wait_set.storage_get_rcl_wait_set();
+      for (; ii < wait_set.size_of_timers(); ++ii) {
+        if (rcl_wait_set.timers[ii] != nullptr) {
+          ret = wait_set.timers(ii);
+          break;
+        }
+      }
+    }
+    return {ret, ii};
+  }
+
+  /// Clear the timer at the given index.
+  /**
+   * Clearing a timer from the wait result prevents it from being returned by
+   * the peek_next_ready_timer() on subsequent calls.
+   *
+   * The index should come from the peek_next_ready_timer() function, and
+   * should only be used with this function if the timer pointer was valid.
+   *
+   * \throws std::out_of_range if the given index is out of range
+   */
+  void
+  clear_timer_with_index(size_t index)
+  {
+    auto & wait_set = this->get_wait_set();
+    auto & rcl_wait_set = wait_set.storage_get_rcl_wait_set();
+    if (index >= wait_set.size_of_timers()) {
+      throw std::out_of_range("given timer index is out of range");
+    }
+    rcl_wait_set.timers[index] = nullptr;
+  }
+
+  /// Get the next ready subscription, clearing it from the wait result.
+  std::shared_ptr<rclcpp::SubscriptionBase>
+  next_ready_subscription()
+  {
+    check_wait_result_dirty();
+    auto ret = std::shared_ptr<rclcpp::SubscriptionBase>{nullptr};
+    if (this->kind() == WaitResultKind::Ready) {
+      auto & wait_set = this->get_wait_set();
+      auto & rcl_wait_set = wait_set.storage_get_rcl_wait_set();
+      for (size_t ii = 0; ii < wait_set.size_of_subscriptions(); ++ii) {
+        if (rcl_wait_set.subscriptions[ii] != nullptr) {
+          ret = wait_set.subscriptions(ii);
+          rcl_wait_set.subscriptions[ii] = nullptr;
+          break;
+        }
+      }
+    }
+    return ret;
+  }
+
+  /// Get the next ready service, clearing it from the wait result.
+  std::shared_ptr<rclcpp::ServiceBase>
+  next_ready_service()
+  {
+    check_wait_result_dirty();
+    auto ret = std::shared_ptr<rclcpp::ServiceBase>{nullptr};
+    if (this->kind() == WaitResultKind::Ready) {
+      auto & wait_set = this->get_wait_set();
+      auto & rcl_wait_set = wait_set.storage_get_rcl_wait_set();
+      for (size_t ii = 0; ii < wait_set.size_of_services(); ++ii) {
+        if (rcl_wait_set.services[ii] != nullptr) {
+          ret = wait_set.services(ii);
+          rcl_wait_set.services[ii] = nullptr;
+          break;
+        }
+      }
+    }
+    return ret;
+  }
+
+  /// Get the next ready client, clearing it from the wait result.
+  std::shared_ptr<rclcpp::ClientBase>
+  next_ready_client()
+  {
+    check_wait_result_dirty();
+    auto ret = std::shared_ptr<rclcpp::ClientBase>{nullptr};
+    if (this->kind() == WaitResultKind::Ready) {
+      auto & wait_set = this->get_wait_set();
+      auto & rcl_wait_set = wait_set.storage_get_rcl_wait_set();
+      for (size_t ii = 0; ii < wait_set.size_of_clients(); ++ii) {
+        if (rcl_wait_set.clients[ii] != nullptr) {
+          ret = wait_set.clients(ii);
+          rcl_wait_set.clients[ii] = nullptr;
+          break;
+        }
+      }
+    }
+    return ret;
+  }
+
+  /// Get the next ready waitable, clearing it from the wait result.
+  std::shared_ptr<rclcpp::Waitable>
+  next_ready_waitable()
+  {
+    check_wait_result_dirty();
+    auto waitable = std::shared_ptr<rclcpp::Waitable>{nullptr};
+    auto data = std::shared_ptr<void>{nullptr};
+
+    if (this->kind() == WaitResultKind::Ready) {
+      auto & wait_set = this->get_wait_set();
+      auto rcl_wait_set = wait_set.get_rcl_wait_set();
+      while (next_waitable_index_ < wait_set.size_of_waitables()) {
+        auto cur_waitable = wait_set.waitables(next_waitable_index_++);
+        if (cur_waitable != nullptr && cur_waitable->is_ready(&rcl_wait_set)) {
+          waitable = cur_waitable;
+          break;
+        }
+      }
+    }
+
+    return waitable;
+  }
+
 private:
   RCLCPP_DISABLE_COPY(WaitResult)
 
@@ -151,12 +304,25 @@ class WaitResult final
     // Should be enforced by the static factory methods on this class.
     assert(WaitResultKind::Ready == wait_result_kind);
     // Secure thread-safety (if provided) and shared ownership (if needed).
-    wait_set_pointer_->wait_result_acquire();
+    this->get_wait_set().wait_result_acquire();
   }
 
-  const WaitResultKind wait_result_kind_;
+  /// Check if the wait result is invalid because the wait set was modified.
+  void
+  check_wait_result_dirty()
+  {
+    // In the case that the wait set was modified while the result was out,
+    // we must mark the wait result as no longer valid
+    if (wait_set_pointer_ && this->get_wait_set().wait_result_dirty_) {
+      this->wait_result_kind_ = WaitResultKind::Invalid;
+    }
+  }
+
+  WaitResultKind wait_result_kind_;
 
   WaitSetT * wait_set_pointer_ = nullptr;
+
+  size_t next_waitable_index_ = 0;
 };
 
 }  // namespace rclcpp
diff --git a/rclcpp/include/rclcpp/wait_result_kind.hpp b/rclcpp/include/rclcpp/wait_result_kind.hpp
index 3ce65bf4f3..7980d1d127 100644
--- a/rclcpp/include/rclcpp/wait_result_kind.hpp
+++ b/rclcpp/include/rclcpp/wait_result_kind.hpp
@@ -26,6 +26,7 @@ enum RCLCPP_PUBLIC WaitResultKind
   Ready,  //<! Kind used when something in the wait set was ready.
   Timeout,  //<! Kind used when the wait resulted in a timeout.
   Empty,  //<! Kind used when trying to wait on an empty wait set.
+  Invalid,  //<! Kind used when wait result has been invalidated.
 };
 
 }  // namespace rclcpp
diff --git a/rclcpp/include/rclcpp/wait_set_policies/detail/storage_policy_common.hpp b/rclcpp/include/rclcpp/wait_set_policies/detail/storage_policy_common.hpp
index 2db3ff630a..42293a3e89 100644
--- a/rclcpp/include/rclcpp/wait_set_policies/detail/storage_policy_common.hpp
+++ b/rclcpp/include/rclcpp/wait_set_policies/detail/storage_policy_common.hpp
@@ -104,7 +104,7 @@ class StoragePolicyCommon
       // TODO(wjwwood): support custom allocator, maybe restrict to polymorphic allocator
       rcl_get_default_allocator());
     if (RCL_RET_OK != ret) {
-      rclcpp::exceptions::throw_from_rcl_error(ret);
+      rclcpp::exceptions::throw_from_rcl_error(ret, "Failed to create wait set");
     }
 
     // (Re)build the wait set for the first time.
@@ -192,8 +192,7 @@ class StoragePolicyCommon
       size_t services_from_waitables = 0;
       size_t events_from_waitables = 0;
       for (const auto & waitable_entry : waitables) {
-        auto waitable_ptr_pair = get_raw_pointer_from_smart_pointer(waitable_entry.waitable);
-        if (nullptr == waitable_ptr_pair.second) {
+        if (!waitable_entry.waitable) {
           // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
           if (HasStrongOwnership) {
             // This will not happen in fixed sized storage, as it holds
@@ -204,7 +203,7 @@ class StoragePolicyCommon
           needs_pruning_ = true;
           continue;
         }
-        rclcpp::Waitable & waitable = *waitable_ptr_pair.second;
+        auto & waitable = *waitable_entry.waitable;
         subscriptions_from_waitables += waitable.get_number_of_ready_subscriptions();
         guard_conditions_from_waitables += waitable.get_number_of_ready_guard_conditions();
         timers_from_waitables += waitable.get_number_of_ready_timers();
@@ -222,7 +221,7 @@ class StoragePolicyCommon
         events_from_waitables
       );
       if (RCL_RET_OK != ret) {
-        rclcpp::exceptions::throw_from_rcl_error(ret);
+        rclcpp::exceptions::throw_from_rcl_error(ret, "Couldn't resize the wait set");
       }
       was_resized = true;
       // Assumption: the calling code ensures this function is not called
@@ -238,15 +237,13 @@ class StoragePolicyCommon
     if (!was_resized) {
       rcl_ret_t ret = rcl_wait_set_clear(&rcl_wait_set_);
       if (RCL_RET_OK != ret) {
-        rclcpp::exceptions::throw_from_rcl_error(ret);
+        rclcpp::exceptions::throw_from_rcl_error(ret, "Couldn't clear the wait set");
       }
     }
 
     // Add subscriptions.
     for (const auto & subscription_entry : subscriptions) {
-      auto subscription_ptr_pair =
-        get_raw_pointer_from_smart_pointer(subscription_entry.subscription);
-      if (nullptr == subscription_ptr_pair.second) {
+      if (!subscription_entry.subscription) {
         // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
         if (HasStrongOwnership) {
           // This will not happen in fixed sized storage, as it holds
@@ -257,12 +254,13 @@ class StoragePolicyCommon
         needs_pruning_ = true;
         continue;
       }
+
       rcl_ret_t ret = rcl_wait_set_add_subscription(
         &rcl_wait_set_,
-        subscription_ptr_pair.second->get_subscription_handle().get(),
+        subscription_entry.subscription->get_subscription_handle().get(),
         nullptr);
       if (RCL_RET_OK != ret) {
-        rclcpp::exceptions::throw_from_rcl_error(ret);
+        rclcpp::exceptions::throw_from_rcl_error(ret, "Couldn't fill wait set");
       }
     }
 
@@ -271,8 +269,7 @@ class StoragePolicyCommon
       [this](const auto & inner_guard_conditions)
       {
         for (const auto & guard_condition : inner_guard_conditions) {
-          auto guard_condition_ptr_pair = get_raw_pointer_from_smart_pointer(guard_condition);
-          if (nullptr == guard_condition_ptr_pair.second) {
+          if (!guard_condition) {
             // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
             if (HasStrongOwnership) {
               // This will not happen in fixed sized storage, as it holds
@@ -285,10 +282,10 @@ class StoragePolicyCommon
           }
           rcl_ret_t ret = rcl_wait_set_add_guard_condition(
             &rcl_wait_set_,
-            &guard_condition_ptr_pair.second->get_rcl_guard_condition(),
+            &guard_condition->get_rcl_guard_condition(),
             nullptr);
           if (RCL_RET_OK != ret) {
-            rclcpp::exceptions::throw_from_rcl_error(ret);
+            rclcpp::exceptions::throw_from_rcl_error(ret, "Couldn't fill wait set");
           }
         }
       };
@@ -301,8 +298,7 @@ class StoragePolicyCommon
 
     // Add timers.
     for (const auto & timer : timers) {
-      auto timer_ptr_pair = get_raw_pointer_from_smart_pointer(timer);
-      if (nullptr == timer_ptr_pair.second) {
+      if (!timer) {
         // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
         if (HasStrongOwnership) {
           // This will not happen in fixed sized storage, as it holds
@@ -315,17 +311,16 @@ class StoragePolicyCommon
       }
       rcl_ret_t ret = rcl_wait_set_add_timer(
         &rcl_wait_set_,
-        timer_ptr_pair.second->get_timer_handle().get(),
+        timer->get_timer_handle().get(),
         nullptr);
       if (RCL_RET_OK != ret) {
-        rclcpp::exceptions::throw_from_rcl_error(ret);
+        rclcpp::exceptions::throw_from_rcl_error(ret, "Couldn't fill wait set");
       }
     }
 
     // Add clients.
     for (const auto & client : clients) {
-      auto client_ptr_pair = get_raw_pointer_from_smart_pointer(client);
-      if (nullptr == client_ptr_pair.second) {
+      if (!client) {
         // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
         if (HasStrongOwnership) {
           // This will not happen in fixed sized storage, as it holds
@@ -338,7 +333,7 @@ class StoragePolicyCommon
       }
       rcl_ret_t ret = rcl_wait_set_add_client(
         &rcl_wait_set_,
-        client_ptr_pair.second->get_client_handle().get(),
+        client->get_client_handle().get(),
         nullptr);
       if (RCL_RET_OK != ret) {
         rclcpp::exceptions::throw_from_rcl_error(ret);
@@ -347,8 +342,7 @@ class StoragePolicyCommon
 
     // Add services.
     for (const auto & service : services) {
-      auto service_ptr_pair = get_raw_pointer_from_smart_pointer(service);
-      if (nullptr == service_ptr_pair.second) {
+      if (!service) {
         // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
         if (HasStrongOwnership) {
           // This will not happen in fixed sized storage, as it holds
@@ -361,17 +355,16 @@ class StoragePolicyCommon
       }
       rcl_ret_t ret = rcl_wait_set_add_service(
         &rcl_wait_set_,
-        service_ptr_pair.second->get_service_handle().get(),
+        service->get_service_handle().get(),
         nullptr);
       if (RCL_RET_OK != ret) {
-        rclcpp::exceptions::throw_from_rcl_error(ret);
+        rclcpp::exceptions::throw_from_rcl_error(ret, "Couldn't fill wait set");
       }
     }
 
     // Add waitables.
     for (auto & waitable_entry : waitables) {
-      auto waitable_ptr_pair = get_raw_pointer_from_smart_pointer(waitable_entry.waitable);
-      if (nullptr == waitable_ptr_pair.second) {
+      if (!waitable_entry.waitable) {
         // In this case it was probably stored as a weak_ptr, but is now locking to nullptr.
         if (HasStrongOwnership) {
           // This will not happen in fixed sized storage, as it holds
@@ -382,8 +375,7 @@ class StoragePolicyCommon
         needs_pruning_ = true;
         continue;
       }
-      rclcpp::Waitable & waitable = *waitable_ptr_pair.second;
-      waitable.add_to_wait_set(&rcl_wait_set_);
+      waitable_entry.waitable->add_to_wait_set(&rcl_wait_set_);
     }
   }
 
@@ -405,6 +397,32 @@ class StoragePolicyCommon
     needs_resize_ = true;
   }
 
+  size_t size_of_subscriptions() const {return 0;}
+  size_t size_of_timers() const {return 0;}
+  size_t size_of_clients() const {return 0;}
+  size_t size_of_services() const {return 0;}
+  size_t size_of_waitables() const {return 0;}
+
+  template<class SubscriptionsIterable>
+  typename SubscriptionsIterable::value_type
+  subscriptions(size_t) const {return nullptr;}
+
+  template<class TimersIterable>
+  typename TimersIterable::value_type
+  timers(size_t) const {return nullptr;}
+
+  template<class ClientsIterable>
+  typename ClientsIterable::value_type
+  clients(size_t) const {return nullptr;}
+
+  template<class ServicesIterable>
+  typename ServicesIterable::value_type
+  services(size_t) const {return nullptr;}
+
+  template<class WaitablesIterable>
+  typename WaitablesIterable::value_type
+  waitables(size_t) const {return nullptr;}
+
   rcl_wait_set_t rcl_wait_set_;
   rclcpp::Context::SharedPtr context_;
 
diff --git a/rclcpp/include/rclcpp/wait_set_policies/dynamic_storage.hpp b/rclcpp/include/rclcpp/wait_set_policies/dynamic_storage.hpp
index 4cec85f39a..8f97596218 100644
--- a/rclcpp/include/rclcpp/wait_set_policies/dynamic_storage.hpp
+++ b/rclcpp/include/rclcpp/wait_set_policies/dynamic_storage.hpp
@@ -204,15 +204,19 @@ class DynamicStorage : public rclcpp::wait_set_policies::detail::StoragePolicyCo
   void
   storage_rebuild_rcl_wait_set(const ArrayOfExtraGuardConditions & extra_guard_conditions)
   {
+    this->storage_acquire_ownerships();
+
     this->storage_rebuild_rcl_wait_set_with_sets(
-      subscriptions_,
-      guard_conditions_,
+      shared_subscriptions_,
+      shared_guard_conditions_,
       extra_guard_conditions,
-      timers_,
-      clients_,
-      services_,
-      waitables_
+      shared_timers_,
+      shared_clients_,
+      shared_services_,
+      shared_waitables_
     );
+
+    this->storage_release_ownerships();
   }
 
   template<class EntityT, class SequenceOfEntitiesT>
@@ -382,6 +386,8 @@ class DynamicStorage : public rclcpp::wait_set_policies::detail::StoragePolicyCo
         return weak_ptr.expired();
       };
     // remove guard conditions which have been deleted
+    subscriptions_.erase(
+      std::remove_if(subscriptions_.begin(), subscriptions_.end(), p), subscriptions_.end());
     guard_conditions_.erase(
       std::remove_if(guard_conditions_.begin(), guard_conditions_.end(), p),
       guard_conditions_.end());
@@ -407,6 +413,7 @@ class DynamicStorage : public rclcpp::wait_set_policies::detail::StoragePolicyCo
         }
       };
     // Lock all the weak pointers and hold them until released.
+    lock_all(subscriptions_, shared_subscriptions_);
     lock_all(guard_conditions_, shared_guard_conditions_);
     lock_all(timers_, shared_timers_);
     lock_all(clients_, shared_clients_);
@@ -438,6 +445,7 @@ class DynamicStorage : public rclcpp::wait_set_policies::detail::StoragePolicyCo
           shared_ptr.reset();
         }
       };
+    reset_all(shared_subscriptions_);
     reset_all(shared_guard_conditions_);
     reset_all(shared_timers_);
     reset_all(shared_clients_);
@@ -445,6 +453,61 @@ class DynamicStorage : public rclcpp::wait_set_policies::detail::StoragePolicyCo
     reset_all(shared_waitables_);
   }
 
+  size_t size_of_subscriptions() const
+  {
+    return shared_subscriptions_.size();
+  }
+
+  size_t size_of_timers() const
+  {
+    return shared_timers_.size();
+  }
+
+  size_t size_of_clients() const
+  {
+    return shared_clients_.size();
+  }
+
+  size_t size_of_services() const
+  {
+    return shared_services_.size();
+  }
+
+  size_t size_of_waitables() const
+  {
+    return shared_waitables_.size();
+  }
+
+  std::shared_ptr<rclcpp::SubscriptionBase>
+  subscriptions(size_t ii) const
+  {
+    return shared_subscriptions_[ii].subscription;
+  }
+
+  std::shared_ptr<rclcpp::TimerBase>
+  timers(size_t ii) const
+  {
+    return shared_timers_[ii];
+  }
+
+  std::shared_ptr<rclcpp::ClientBase>
+  clients(size_t ii) const
+  {
+    return shared_clients_[ii];
+  }
+
+  std::shared_ptr<rclcpp::ServiceBase>
+  services(size_t ii) const
+  {
+    return shared_services_[ii];
+  }
+
+  std::shared_ptr<rclcpp::Waitable>
+  waitables(size_t ii) const
+  {
+    return shared_waitables_[ii].waitable;
+  }
+
   size_t ownership_reference_counter_ = 0;
 
   SequenceOfWeakSubscriptions subscriptions_;
diff --git a/rclcpp/include/rclcpp/wait_set_policies/sequential_synchronization.hpp b/rclcpp/include/rclcpp/wait_set_policies/sequential_synchronization.hpp
index be2e569c40..4afc2a1b27 100644
--- a/rclcpp/include/rclcpp/wait_set_policies/sequential_synchronization.hpp
+++ b/rclcpp/include/rclcpp/wait_set_policies/sequential_synchronization.hpp
@@ -290,7 +290,7 @@ class SequentialSynchronization : public detail::SynchronizationPolicyCommon
         return create_wait_result(WaitResultKind::Empty);
       } else {
         // Some other error case, throw.
-        rclcpp::exceptions::throw_from_rcl_error(ret);
+        rclcpp::exceptions::throw_from_rcl_error(ret, "rcl_wait() failed");
       }
     } while (should_loop());
 
diff --git a/rclcpp/include/rclcpp/wait_set_policies/static_storage.hpp b/rclcpp/include/rclcpp/wait_set_policies/static_storage.hpp
index 434947c19f..7f5cad74ad 100644
--- a/rclcpp/include/rclcpp/wait_set_policies/static_storage.hpp
+++ b/rclcpp/include/rclcpp/wait_set_policies/static_storage.hpp
@@ -188,6 +188,61 @@ class StaticStorage : public rclcpp::wait_set_policies::detail::StoragePolicyCom
     // Explicitly do nothing.
   }
 
+  size_t size_of_subscriptions() const
+  {
+    return subscriptions_.size();
+  }
+
+  size_t size_of_timers() const
+  {
+    return timers_.size();
+  }
+
+  size_t size_of_clients() const
+  {
+    return clients_.size();
+  }
+
+  size_t size_of_services() const
+  {
+    return services_.size();
+  }
+
+  size_t size_of_waitables() const
+  {
+    return waitables_.size();
+  }
+
+  typename ArrayOfSubscriptions::value_type
+  subscriptions(size_t ii) const
+  {
+    return subscriptions_[ii];
+  }
+
+  typename ArrayOfTimers::value_type
+  timers(size_t ii) const
+  {
+    return timers_[ii];
+  }
+
+  typename ArrayOfClients::value_type
+  clients(size_t ii) const
+  {
+    return clients_[ii];
+  }
+
+  typename ArrayOfServices::value_type
+  services(size_t ii) const
+  {
+    return services_[ii];
+  }
+
+  typename ArrayOfWaitables::value_type
+  waitables(size_t ii) const
+  {
+    return waitables_[ii];
+  }
+
   const ArrayOfSubscriptions subscriptions_;
   const ArrayOfGuardConditions guard_conditions_;
   const ArrayOfTimers timers_;
diff --git a/rclcpp/include/rclcpp/wait_set_template.hpp b/rclcpp/include/rclcpp/wait_set_template.hpp
index 3654801c91..8185c0a6b7 100644
--- a/rclcpp/include/rclcpp/wait_set_template.hpp
+++ b/rclcpp/include/rclcpp/wait_set_template.hpp
@@ -153,6 +153,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
             throw std::runtime_error("subscription already associated with a wait set");
           }
           this->storage_add_subscription(std::move(local_subscription));
+          if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
         }
         if (mask.include_events) {
           for (auto key_event_pair : inner_subscription->get_event_handlers()) {
@@ -164,6 +165,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
               throw std::runtime_error("subscription event already associated with a wait set");
             }
             this->storage_add_waitable(std::move(event), std::move(local_subscription));
+            if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
           }
         }
         if (mask.include_intra_process_waitable) {
@@ -180,6 +182,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
             this->storage_add_waitable(
               std::move(inner_subscription->get_intra_process_waitable()),
               std::move(local_subscription));
+            if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
           }
         }
       });
@@ -224,6 +227,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
           auto local_subscription = inner_subscription;
           local_subscription->exchange_in_use_by_wait_set_state(local_subscription.get(), false);
           this->storage_remove_subscription(std::move(local_subscription));
+          if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
         }
         if (mask.include_events) {
           for (auto key_event_pair : inner_subscription->get_event_handlers()) {
@@ -231,6 +235,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
             auto local_subscription = inner_subscription;
             local_subscription->exchange_in_use_by_wait_set_state(event.get(), false);
             this->storage_remove_waitable(std::move(event));
+            if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
           }
         }
         if (mask.include_intra_process_waitable) {
@@ -239,6 +244,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
             // This is the case when intra process is enabled for the subscription.
             inner_subscription->exchange_in_use_by_wait_set_state(local_waitable.get(), false);
             this->storage_remove_waitable(std::move(local_waitable));
+            if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
           }
         }
       });
@@ -289,6 +295,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the guard condition has already been added.
         this->storage_add_guard_condition(std::move(inner_guard_condition));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -326,6 +333,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the guard condition is not in the wait set.
         this->storage_remove_guard_condition(std::move(inner_guard_condition));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -357,6 +365,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the timer has already been added.
         this->storage_add_timer(std::move(inner_timer));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -384,6 +393,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the timer is not in the wait set.
         this->storage_remove_timer(std::move(inner_timer));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -415,6 +425,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the client has already been added.
         this->storage_add_client(std::move(inner_client));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -442,6 +453,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the client is not in the wait set.
         this->storage_remove_client(std::move(inner_client));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -473,6 +485,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the service has already been added.
         this->storage_add_service(std::move(inner_service));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -500,6 +513,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the service is not in the wait set.
         this->storage_remove_service(std::move(inner_service));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -551,6 +565,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the waitable has already been added.
         this->storage_add_waitable(std::move(inner_waitable), std::move(associated_entity));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -578,6 +593,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
         // fixed sized storage policies.
         // It will throw if the waitable is not in the wait set.
         this->storage_remove_waitable(std::move(inner_waitable));
+        if (this->wait_result_holding_) {this->wait_result_dirty_ = true;}
       });
   }
 
@@ -715,6 +731,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
       throw std::runtime_error("wait_result_acquire() called while already holding");
     }
     wait_result_holding_ = true;
+    wait_result_dirty_ = false;
     // this method comes from the SynchronizationPolicy
     this->sync_wait_result_acquire();
     // this method comes from the StoragePolicy
@@ -734,6 +751,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
       throw std::runtime_error("wait_result_release() called while not holding");
     }
     wait_result_holding_ = false;
+    wait_result_dirty_ = false;
     // this method comes from the StoragePolicy
     this->storage_release_ownerships();
     // this method comes from the SynchronizationPolicy
@@ -741,6 +759,7 @@ class WaitSetTemplate final : private SynchronizationPolicy, private StoragePoli
   }
 
   bool wait_result_holding_ = false;
+  bool wait_result_dirty_ = false;
 };
 
 }  // namespace rclcpp
diff --git a/rclcpp/src/rclcpp/any_executable.cpp b/rclcpp/src/rclcpp/any_executable.cpp
index 769deacb11..2449cbe1f7 100644
--- a/rclcpp/src/rclcpp/any_executable.cpp
+++ b/rclcpp/src/rclcpp/any_executable.cpp
@@ -21,6 +21,7 @@ AnyExecutable::AnyExecutable()
   timer(nullptr),
   service(nullptr),
   client(nullptr),
+  waitable(nullptr),
   callback_group(nullptr),
   node_base(nullptr)
 {}
diff --git a/rclcpp/src/rclcpp/callback_group.cpp b/rclcpp/src/rclcpp/callback_group.cpp
index 77f6c87bd9..4811c7da51 100644
--- a/rclcpp/src/rclcpp/callback_group.cpp
+++ b/rclcpp/src/rclcpp/callback_group.cpp
@@ -66,6 +66,7 @@ CallbackGroup::size() const
     timer_ptrs_.size() +
     waitable_ptrs_.size();
 }
+
 void CallbackGroup::collect_all_ptrs(
   std::function<void(const rclcpp::SubscriptionBase::SharedPtr &)> sub_func,
   std::function<void(const rclcpp::ServiceBase::SharedPtr &)> service_func,
diff --git a/rclcpp/src/rclcpp/executor.cpp b/rclcpp/src/rclcpp/executor.cpp
index d9f87b7be6..262851b040 100644
--- a/rclcpp/src/rclcpp/executor.cpp
+++ b/rclcpp/src/rclcpp/executor.cpp
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #include <algorithm>
+#include <chrono>
+#include <iterator>
 #include <memory>
 #include <map>
 #include <string>
@@ -22,12 +24,13 @@
 
 #include "rcl/allocator.h"
 #include "rcl/error_handling.h"
+#include "rclcpp/executors/executor_notify_waitable.hpp"
+#include "rclcpp/subscription_wait_set_mask.hpp"
 #include "rcpputils/scope_exit.hpp"
 
 #include "rclcpp/exceptions.hpp"
 #include "rclcpp/executor.hpp"
 #include "rclcpp/guard_condition.hpp"
-#include "rclcpp/memory_strategy.hpp"
 #include "rclcpp/node.hpp"
 #include "rclcpp/utilities.hpp"
 
@@ -37,21 +40,29 @@
 
 using namespace std::chrono_literals;
 
-using rclcpp::exceptions::throw_from_rcl_error;
 using rclcpp::Executor;
 
+/// Mask to indicate to the waitset to only add the subscription.
+/// The events and intraprocess waitable are already added via the callback group.
+static constexpr rclcpp::SubscriptionWaitSetMask kDefaultSubscriptionMask = {true, false, false};
+
 class rclcpp::ExecutorImplementation {};
 
 Executor::Executor(const rclcpp::ExecutorOptions & options)
 : spinning(false),
   interrupt_guard_condition_(std::make_shared<rclcpp::GuardCondition>(options.context)),
   shutdown_guard_condition_(std::make_shared<rclcpp::GuardCondition>(options.context)),
-  memory_strategy_(options.memory_strategy),
+  context_(options.context),
+  notify_waitable_(std::make_shared<rclcpp::executors::ExecutorNotifyWaitable>(
+      [this]() {
+        this->entities_need_rebuild_.store(true);
+      })),
+  entities_need_rebuild_(true),
+  collector_(notify_waitable_),
+  wait_set_({}, {}, {}, {}, {}, {}, options.context),
+  current_notify_waitable_(notify_waitable_),
   impl_(std::make_unique<rclcpp::ExecutorImplementation>())
 {
-  // Store the context for later use.
-  context_ = options.context;
-
   shutdown_callback_handle_ = context_->add_on_shutdown_callback(
     [weak_gc = std::weak_ptr<rclcpp::GuardCondition>{shutdown_guard_condition_}]() {
       auto strong_gc = weak_gc.lock();
@@ -60,74 +71,47 @@ Executor::Executor(const rclcpp::ExecutorOptions & options)
       }
     });
 
-  // The number of guard conditions is always at least 2: 1 for the ctrl-c guard cond,
-  // and one for the executor's guard cond (interrupt_guard_condition_)
-  memory_strategy_->add_guard_condition(*shutdown_guard_condition_.get());
-
-  // Put the executor's guard condition in
-  memory_strategy_->add_guard_condition(*interrupt_guard_condition_.get());
-  rcl_allocator_t allocator = memory_strategy_->get_allocator();
+  notify_waitable_->set_on_ready_callback(
+    [this](auto, auto) {
+      this->entities_need_rebuild_.store(true);
+    });
 
-  rcl_ret_t ret = rcl_wait_set_init(
-    &wait_set_,
-    0, 2, 0, 0, 0, 0,
-    context_->get_rcl_context().get(),
-    allocator);
-  if (RCL_RET_OK != ret) {
-    RCUTILS_LOG_ERROR_NAMED(
-      "rclcpp",
-      "failed to create wait set: %s", rcl_get_error_string().str);
-    rcl_reset_error();
-    throw_from_rcl_error(ret, "Failed to create wait set in Executor constructor");
-  }
+  notify_waitable_->add_guard_condition(interrupt_guard_condition_);
+  notify_waitable_->add_guard_condition(shutdown_guard_condition_);
 }
 
 Executor::~Executor()
 {
-  // Disassociate all callback groups.
-  for (auto & pair : weak_groups_to_nodes_) {
-    auto group = pair.first.lock();
-    if (group) {
-      std::atomic_bool & has_executor = group->get_associated_with_executor_atomic();
-      has_executor.store(false);
-    }
-  }
-  // Disassociate all nodes.
-  std::for_each(
-    weak_nodes_.begin(), weak_nodes_.end(), []
-      (rclcpp::node_interfaces::NodeBaseInterface::WeakPtr weak_node_ptr) {
-      auto shared_node_ptr = weak_node_ptr.lock();
-      if (shared_node_ptr) {
-        std::atomic_bool & has_executor = shared_node_ptr->get_associated_with_executor_atomic();
-        has_executor.store(false);
-      }
+  std::lock_guard<std::mutex> guard(mutex_);
+
+  notify_waitable_->remove_guard_condition(interrupt_guard_condition_);
+  notify_waitable_->remove_guard_condition(shutdown_guard_condition_);
+
+  current_collection_.timers.update(
+    {}, {},
+    [this](auto timer) {wait_set_.remove_timer(timer);});
+
+  current_collection_.subscriptions.update(
+    {}, {},
+    [this](auto subscription) {
+      wait_set_.remove_subscription(subscription, kDefaultSubscriptionMask);
     });
-  weak_nodes_.clear();
-  weak_groups_associated_with_executor_to_nodes_.clear();
-  weak_groups_to_nodes_associated_with_executor_.clear();
-  weak_groups_to_nodes_.clear();
-  for (const auto & pair : weak_groups_to_guard_conditions_) {
-    auto guard_condition = pair.second;
-    memory_strategy_->remove_guard_condition(guard_condition);
-  }
-  weak_groups_to_guard_conditions_.clear();
 
-  for (const auto & pair : weak_nodes_to_guard_conditions_) {
-    auto guard_condition = pair.second;
-    memory_strategy_->remove_guard_condition(guard_condition);
-  }
-  weak_nodes_to_guard_conditions_.clear();
+  current_collection_.clients.update(
+    {}, {},
+    [this](auto client) {wait_set_.remove_client(client);});
 
-  // Finalize the wait set.
-  if (rcl_wait_set_fini(&wait_set_) != RCL_RET_OK) {
-    RCUTILS_LOG_ERROR_NAMED(
-      "rclcpp",
-      "failed to destroy wait set: %s", rcl_get_error_string().str);
-    rcl_reset_error();
-  }
-  // Remove and release the sigint guard condition
-  memory_strategy_->remove_guard_condition(shutdown_guard_condition_.get());
-  memory_strategy_->remove_guard_condition(interrupt_guard_condition_.get());
+  current_collection_.services.update(
+    {}, {},
+    [this](auto service) {wait_set_.remove_service(service);});
+
+  current_collection_.guard_conditions.update(
+    {}, {},
+    [this](auto guard_condition) {wait_set_.remove_guard_condition(guard_condition);});
+
+  current_collection_.waitables.update(
+    {}, {},
+    [this](auto waitable) {wait_set_.remove_waitable(waitable);});
 
   // Remove shutdown callback handle registered to Context
   if (!context_->remove_on_shutdown_callback(shutdown_callback_handle_)) {
@@ -141,95 +125,39 @@ Executor::~Executor()
 std::vector<rclcpp::CallbackGroup::WeakPtr>
 Executor::get_all_callback_groups()
 {
-  std::vector<rclcpp::CallbackGroup::WeakPtr> groups;
-  std::lock_guard<std::mutex> guard{mutex_};
-  for (const auto & group_node_ptr : weak_groups_associated_with_executor_to_nodes_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  for (auto const & group_node_ptr : weak_groups_to_nodes_associated_with_executor_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  return groups;
+  this->collector_.update_collections();
+  return this->collector_.get_all_callback_groups();
 }
 
 std::vector<rclcpp::CallbackGroup::WeakPtr>
 Executor::get_manually_added_callback_groups()
 {
-  std::vector<rclcpp::CallbackGroup::WeakPtr> groups;
-  std::lock_guard<std::mutex> guard{mutex_};
-  for (auto const & group_node_ptr : weak_groups_associated_with_executor_to_nodes_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  return groups;
+  this->collector_.update_collections();
+  return this->collector_.get_manually_added_callback_groups();
 }
 
 std::vector<rclcpp::CallbackGroup::WeakPtr>
 Executor::get_automatically_added_callback_groups_from_nodes()
 {
-  std::vector<rclcpp::CallbackGroup::WeakPtr> groups;
-  std::lock_guard<std::mutex> guard{mutex_};
-  for (auto const & group_node_ptr : weak_groups_to_nodes_associated_with_executor_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  return groups;
-}
-
-void
-Executor::add_callback_groups_from_nodes_associated_to_executor()
-{
-  for (auto & weak_node : weak_nodes_) {
-    auto node = weak_node.lock();
-    if (node) {
-      node->for_each_callback_group(
-        [this, node](rclcpp::CallbackGroup::SharedPtr shared_group_ptr)
-        {
-          if (
-            shared_group_ptr->automatically_add_to_executor_with_node() &&
-            !shared_group_ptr->get_associated_with_executor_atomic().load())
-          {
-            this->add_callback_group_to_map(
-              shared_group_ptr,
-              node,
-              weak_groups_to_nodes_associated_with_executor_,
-              true);
-          }
-        });
-    }
-  }
+  this->collector_.update_collections();
+  return this->collector_.get_automatically_added_callback_groups();
 }
 
 void
-Executor::add_callback_group_to_map(
+Executor::add_callback_group(
   rclcpp::CallbackGroup::SharedPtr group_ptr,
   rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-  rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap & weak_groups_to_nodes,
   bool notify)
 {
-  // If the callback_group already has an executor
-  std::atomic_bool & has_executor = group_ptr->get_associated_with_executor_atomic();
-  if (has_executor.exchange(true)) {
-    throw std::runtime_error("Callback group has already been added to an executor.");
-  }
-
-  rclcpp::CallbackGroup::WeakPtr weak_group_ptr = group_ptr;
-  auto insert_info =
-    weak_groups_to_nodes.insert(std::make_pair(weak_group_ptr, node_ptr));
-  bool was_inserted = insert_info.second;
-  if (!was_inserted) {
-    throw std::runtime_error("Callback group was already added to executor.");
-  }
-  // Also add to the map that contains all callback groups
-  weak_groups_to_nodes_.insert(std::make_pair(weak_group_ptr, node_ptr));
+  (void) node_ptr;
+  this->collector_.add_callback_group(group_ptr);
 
-  if (node_ptr->get_context()->is_valid()) {
-    auto callback_group_guard_condition = group_ptr->get_notify_guard_condition();
-    weak_groups_to_guard_conditions_[weak_group_ptr] = callback_group_guard_condition.get();
-    // Add the callback_group's notify condition to the guard condition handles
-    memory_strategy_->add_guard_condition(*callback_group_guard_condition);
+  if (!spinning.load()) {
+    std::lock_guard<std::mutex> guard(mutex_);
+    this->collect_entities();
   }
 
   if (notify) {
-    // Interrupt waiting to handle new node
     try {
       interrupt_guard_condition_->trigger();
     } catch (const rclcpp::exceptions::RCLError & ex) {
@@ -240,91 +168,23 @@ Executor::add_callback_group_to_map(
   }
 }
 
-void
-Executor::add_callback_group(
-  rclcpp::CallbackGroup::SharedPtr group_ptr,
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-  bool notify)
-{
-  std::lock_guard<std::mutex> guard{mutex_};
-  this->add_callback_group_to_map(
-    group_ptr,
-    node_ptr,
-    weak_groups_associated_with_executor_to_nodes_,
-    notify);
-}
-
 void
 Executor::add_node(rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr, bool notify)
 {
-  // If the node already has an executor
-  std::atomic_bool & has_executor = node_ptr->get_associated_with_executor_atomic();
-  if (has_executor.exchange(true)) {
-    throw std::runtime_error(
-            std::string("Node '") + node_ptr->get_fully_qualified_name() +
-            "' has already been added to an executor.");
-  }
-  std::lock_guard<std::mutex> guard{mutex_};
-  node_ptr->for_each_callback_group(
-    [this, node_ptr, notify](rclcpp::CallbackGroup::SharedPtr group_ptr)
-    {
-      if (!group_ptr->get_associated_with_executor_atomic().load() &&
-      group_ptr->automatically_add_to_executor_with_node())
-      {
-        this->add_callback_group_to_map(
-          group_ptr,
-          node_ptr,
-          weak_groups_to_nodes_associated_with_executor_,
-          notify);
-      }
-    });
-
-  const auto gc = node_ptr->get_shared_notify_guard_condition();
-  weak_nodes_to_guard_conditions_[node_ptr] = gc.get();
-  // Add the node's notify condition to the guard condition handles
-  memory_strategy_->add_guard_condition(*gc);
-  weak_nodes_.push_back(node_ptr);
-}
+  this->collector_.add_node(node_ptr);
 
-void
-Executor::remove_callback_group_from_map(
-  rclcpp::CallbackGroup::SharedPtr group_ptr,
-  rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap & weak_groups_to_nodes,
-  bool notify)
-{
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr;
-  rclcpp::CallbackGroup::WeakPtr weak_group_ptr = group_ptr;
-  auto iter = weak_groups_to_nodes.find(weak_group_ptr);
-  if (iter != weak_groups_to_nodes.end()) {
-    node_ptr = iter->second.lock();
-    if (node_ptr == nullptr) {
-      throw std::runtime_error("Node must not be deleted before its callback group(s).");
-    }
-    weak_groups_to_nodes.erase(iter);
-    weak_groups_to_nodes_.erase(group_ptr);
-    std::atomic_bool & has_executor = group_ptr->get_associated_with_executor_atomic();
-    has_executor.store(false);
-  } else {
-    throw std::runtime_error("Callback group needs to be associated with executor.");
+  if (!spinning.load()) {
+    std::lock_guard<std::mutex> guard(mutex_);
+    this->collect_entities();
   }
-  // If the node was matched and removed, interrupt waiting.
-  if (!has_node(node_ptr, weak_groups_to_nodes_associated_with_executor_) &&
-    !has_node(node_ptr, weak_groups_associated_with_executor_to_nodes_))
-  {
-    auto iter = weak_groups_to_guard_conditions_.find(weak_group_ptr);
-    if (iter != weak_groups_to_guard_conditions_.end()) {
-      memory_strategy_->remove_guard_condition(iter->second);
-    }
-    weak_groups_to_guard_conditions_.erase(weak_group_ptr);
-
-    if (notify) {
-      try {
-        interrupt_guard_condition_->trigger();
-      } catch (const rclcpp::exceptions::RCLError & ex) {
-        throw std::runtime_error(
-                std::string(
-                  "Failed to trigger guard condition on callback group remove: ") + ex.what());
-      }
+
+  if (notify) {
+    try {
+      interrupt_guard_condition_->trigger();
+    } catch (const rclcpp::exceptions::RCLError & ex) {
+      throw std::runtime_error(
+              std::string(
+                "Failed to trigger guard condition on node add: ") + ex.what());
     }
   }
 }
@@ -334,11 +194,21 @@ Executor::remove_callback_group(
   rclcpp::CallbackGroup::SharedPtr group_ptr,
   bool notify)
 {
-  std::lock_guard<std::mutex> guard{mutex_};
-  this->remove_callback_group_from_map(
-    group_ptr,
-    weak_groups_associated_with_executor_to_nodes_,
-    notify);
+  this->collector_.remove_callback_group(group_ptr);
+
+  if (!spinning.load()) {
+    std::lock_guard<std::mutex> guard(mutex_);
+    this->collect_entities();
+  }
+  if (notify) {
+    try {
+      interrupt_guard_condition_->trigger();
+    } catch (const rclcpp::exceptions::RCLError & ex) {
+      throw std::runtime_error(
+              std::string(
+                "Failed to trigger guard condition on callback group remove: ") + ex.what());
+    }
+  }
 }
 
 void
@@ -350,48 +220,22 @@ Executor::add_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify)
 void
 Executor::remove_node(rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr, bool notify)
 {
-  if (!node_ptr->get_associated_with_executor_atomic().load()) {
-    throw std::runtime_error("Node needs to be associated with an executor.");
-  }
-
-  std::lock_guard<std::mutex> guard{mutex_};
-  bool found_node = false;
-  auto node_it = weak_nodes_.begin();
-  while (node_it != weak_nodes_.end()) {
-    bool matched = (node_it->lock() == node_ptr);
-    if (matched) {
-      found_node = true;
-      node_it = weak_nodes_.erase(node_it);
-    } else {
-      ++node_it;
-    }
-  }
-  if (!found_node) {
-    throw std::runtime_error("Node needs to be associated with this executor.");
+  this->collector_.remove_node(node_ptr);
+
+  if (!spinning.load()) {
+    std::lock_guard<std::mutex> guard(mutex_);
+    this->collect_entities();
   }
 
-  for (auto it = weak_groups_to_nodes_associated_with_executor_.begin();
-    it != weak_groups_to_nodes_associated_with_executor_.end(); )
-  {
-    auto weak_node_ptr = it->second;
-    auto shared_node_ptr = weak_node_ptr.lock();
-    auto group_ptr = it->first.lock();
-
-    // Increment iterator before removing in case it's invalidated
-    it++;
-    if (shared_node_ptr == node_ptr) {
-      remove_callback_group_from_map(
-        group_ptr,
-        weak_groups_to_nodes_associated_with_executor_,
-        notify);
+  if (notify) {
+    try {
+      interrupt_guard_condition_->trigger();
+    } catch (const rclcpp::exceptions::RCLError & ex) {
+      throw std::runtime_error(
+              std::string(
+                "Failed to trigger guard condition on node remove: ") + ex.what());
     }
   }
-
-  memory_strategy_->remove_guard_condition(node_ptr->get_shared_notify_guard_condition().get());
-  weak_nodes_to_guard_conditions_.erase(node_ptr);
-
-  std::atomic_bool & has_executor = node_ptr->get_associated_with_executor_atomic();
-  has_executor.store(false);
 }
 
 void
@@ -458,20 +302,25 @@ Executor::spin_some_impl(std::chrono::nanoseconds max_duration, bool exhaustive)
     throw std::runtime_error("spin_some() called while already spinning");
   }
   RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
-  bool work_available = false;
+
   while (rclcpp::ok(context_) && spinning.load() && max_duration_not_elapsed()) {
-    AnyExecutable any_exec;
-    if (!work_available) {
-      wait_for_work(std::chrono::milliseconds::zero());
+    if (!wait_result_.has_value()) {
+      wait_for_work(std::chrono::milliseconds(0));
     }
+
+    AnyExecutable any_exec;
     if (get_next_ready_executable(any_exec)) {
       execute_any_executable(any_exec);
-      work_available = true;
     } else {
-      if (!work_available || !exhaustive) {
-        break;
-      }
-      work_available = false;
+      // If nothing is ready, reset the result to signal we are
+      // ready to wait again
+      wait_result_.reset();
+    }
+
+    if (!wait_result_.has_value() && !exhaustive) {
+      // In the case of spin some, then we can exit
+      // In the case of spin all, then we will allow ourselves to wait again.
+      break;
     }
   }
 }
@@ -507,27 +356,18 @@ Executor::cancel()
   }
 }
 
-void
-Executor::set_memory_strategy(rclcpp::memory_strategy::MemoryStrategy::SharedPtr memory_strategy)
-{
-  if (memory_strategy == nullptr) {
-    throw std::runtime_error("Received NULL memory strategy in executor.");
-  }
-  std::lock_guard<std::mutex> guard{mutex_};
-  memory_strategy_ = memory_strategy;
-}
-
 void
 Executor::execute_any_executable(AnyExecutable & any_exec)
 {
   if (!spinning.load()) {
     return;
   }
+
   if (any_exec.timer) {
     TRACEPOINT(
       rclcpp_executor_execute,
       static_cast<const void *>(any_exec.timer->get_timer_handle().get()));
-    execute_timer(any_exec.timer);
+    execute_timer(any_exec.timer, any_exec.data);
   }
   if (any_exec.subscription) {
     TRACEPOINT(
@@ -544,16 +384,10 @@ Executor::execute_any_executable(AnyExecutable & any_exec)
   if (any_exec.waitable) {
     any_exec.waitable->execute(any_exec.data);
   }
+
   // Reset the callback_group, regardless of type
-  any_exec.callback_group->can_be_taken_from().store(true);
-  // Wake the wait, because it may need to be recalculated or work that
-  // was previously blocked is now available.
-  try {
-    interrupt_guard_condition_->trigger();
-  } catch (const rclcpp::exceptions::RCLError & ex) {
-    throw std::runtime_error(
-            std::string(
-              "Failed to trigger guard condition from execute_any_executable: ") + ex.what());
+  if (any_exec.callback_group) {
+    any_exec.callback_group->can_be_taken_from().store(true);
   }
 }
 
@@ -664,9 +498,9 @@ Executor::execute_subscription(rclcpp::SubscriptionBase::SharedPtr subscription)
 }
 
 void
-Executor::execute_timer(rclcpp::TimerBase::SharedPtr timer)
+Executor::execute_timer(rclcpp::TimerBase::SharedPtr timer, const std::shared_ptr<void> & dataPtr)
 {
-  timer->execute_callback();
+  timer->execute_callback(dataPtr);
 }
 
 void
@@ -683,8 +517,7 @@ Executor::execute_service(rclcpp::ServiceBase::SharedPtr service)
 }
 
 void
-Executor::execute_client(
-  rclcpp::ClientBase::SharedPtr client)
+Executor::execute_client(rclcpp::ClientBase::SharedPtr client)
 {
   auto request_header = client->create_request_header();
   std::shared_ptr<void> response = client->create_response();
@@ -696,227 +529,217 @@ Executor::execute_client(
 }
 
 void
-Executor::wait_for_work(std::chrono::nanoseconds timeout)
-{
-  TRACEPOINT(rclcpp_executor_wait_for_work, timeout.count());
-  {
-    std::lock_guard<std::mutex> guard(mutex_);
-
-    // Check weak_nodes_ to find any callback group that is not owned
-    // by an executor and add it to the list of callbackgroups for
-    // collect entities. Also exchange to false so it is not
-    // allowed to add to another executor
-    add_callback_groups_from_nodes_associated_to_executor();
-
-    // Collect the subscriptions and timers to be waited on
-    memory_strategy_->clear_handles();
-    bool has_invalid_weak_groups_or_nodes =
-      memory_strategy_->collect_entities(weak_groups_to_nodes_);
-
-    if (has_invalid_weak_groups_or_nodes) {
-      std::vector<rclcpp::CallbackGroup::WeakPtr> invalid_group_ptrs;
-      for (auto pair : weak_groups_to_nodes_) {
-        auto weak_group_ptr = pair.first;
-        auto weak_node_ptr = pair.second;
-        if (weak_group_ptr.expired() || weak_node_ptr.expired()) {
-          invalid_group_ptrs.push_back(weak_group_ptr);
-          auto node_guard_pair = weak_nodes_to_guard_conditions_.find(weak_node_ptr);
-          if (node_guard_pair != weak_nodes_to_guard_conditions_.end()) {
-            auto guard_condition = node_guard_pair->second;
-            weak_nodes_to_guard_conditions_.erase(weak_node_ptr);
-            memory_strategy_->remove_guard_condition(guard_condition);
-          }
-        }
-      }
-      std::for_each(
-        invalid_group_ptrs.begin(), invalid_group_ptrs.end(),
-        [this](rclcpp::CallbackGroup::WeakPtr group_ptr) {
-          if (weak_groups_to_nodes_associated_with_executor_.find(group_ptr) !=
-          weak_groups_to_nodes_associated_with_executor_.end())
-          {
-            weak_groups_to_nodes_associated_with_executor_.erase(group_ptr);
-          }
-          if (weak_groups_associated_with_executor_to_nodes_.find(group_ptr) !=
-          weak_groups_associated_with_executor_to_nodes_.end())
-          {
-            weak_groups_associated_with_executor_to_nodes_.erase(group_ptr);
-          }
-          auto callback_guard_pair = weak_groups_to_guard_conditions_.find(group_ptr);
-          if (callback_guard_pair != weak_groups_to_guard_conditions_.end()) {
-            auto guard_condition = callback_guard_pair->second;
-            weak_groups_to_guard_conditions_.erase(group_ptr);
-            memory_strategy_->remove_guard_condition(guard_condition);
-          }
-          weak_groups_to_nodes_.erase(group_ptr);
-        });
-    }
+Executor::collect_entities()
+{
+  // Updating the entity collection and waitset expires any active result
+  this->wait_result_.reset();
+
+  // Get the current list of available waitables from the collector.
+  rclcpp::executors::ExecutorEntitiesCollection collection;
+  this->collector_.update_collections();
+  auto callback_groups = this->collector_.get_all_callback_groups();
+  rclcpp::executors::build_entities_collection(callback_groups, collection);
+
+  // Make a copy of notify waitable so we can continue to mutate the original
+  // one outside of the execute loop.
+  // This prevents the collection of guard conditions in the waitable from changing
+  // while we are waiting on it.
+  if (notify_waitable_) {
+    current_notify_waitable_ = std::make_shared<rclcpp::executors::ExecutorNotifyWaitable>(
+      *notify_waitable_);
+    auto notify_waitable = std::static_pointer_cast<rclcpp::Waitable>(current_notify_waitable_);
+    collection.waitables.insert({notify_waitable.get(), {notify_waitable, {}}});
+  }
+
+  // We must remove expired entities here, so that we don't continue to use older entities.
+  // See https://github.com/ros2/rclcpp/issues/2180 for more information.
+  current_collection_.remove_expired_entities();
+
+  // Update each of the groups of entities in the current collection, adding or removing
+  // from the wait set as necessary.
+  current_collection_.timers.update(
+    collection.timers,
+    [this](auto timer) {wait_set_.add_timer(timer);},
+    [this](auto timer) {wait_set_.remove_timer(timer);});
+
+  current_collection_.subscriptions.update(
+    collection.subscriptions,
+    [this](auto subscription) {
+      wait_set_.add_subscription(subscription, kDefaultSubscriptionMask);
+    },
+    [this](auto subscription) {
+      wait_set_.remove_subscription(subscription, kDefaultSubscriptionMask);
+    });
 
-    // clear wait set
-    rcl_ret_t ret = rcl_wait_set_clear(&wait_set_);
-    if (ret != RCL_RET_OK) {
-      throw_from_rcl_error(ret, "Couldn't clear wait set");
-    }
+  current_collection_.clients.update(
+    collection.clients,
+    [this](auto client) {wait_set_.add_client(client);},
+    [this](auto client) {wait_set_.remove_client(client);});
 
-    // The size of waitables are accounted for in size of the other entities
-    ret = rcl_wait_set_resize(
-      &wait_set_, memory_strategy_->number_of_ready_subscriptions(),
-      memory_strategy_->number_of_guard_conditions(), memory_strategy_->number_of_ready_timers(),
-      memory_strategy_->number_of_ready_clients(), memory_strategy_->number_of_ready_services(),
-      memory_strategy_->number_of_ready_events());
-    if (RCL_RET_OK != ret) {
-      throw_from_rcl_error(ret, "Couldn't resize the wait set");
-    }
+  current_collection_.services.update(
+    collection.services,
+    [this](auto service) {wait_set_.add_service(service);},
+    [this](auto service) {wait_set_.remove_service(service);});
 
-    if (!memory_strategy_->add_handles_to_wait_set(&wait_set_)) {
-      throw std::runtime_error("Couldn't fill wait set");
-    }
-  }
+  current_collection_.guard_conditions.update(
+    collection.guard_conditions,
+    [this](auto guard_condition) {wait_set_.add_guard_condition(guard_condition);},
+    [this](auto guard_condition) {wait_set_.remove_guard_condition(guard_condition);});
 
-  rcl_ret_t status =
-    rcl_wait(&wait_set_, std::chrono::duration_cast<std::chrono::nanoseconds>(timeout).count());
-  if (status == RCL_RET_WAIT_SET_EMPTY) {
-    RCUTILS_LOG_WARN_NAMED(
-      "rclcpp",
-      "empty wait set received in rcl_wait(). This should never happen.");
-  } else if (status != RCL_RET_OK && status != RCL_RET_TIMEOUT) {
-    using rclcpp::exceptions::throw_from_rcl_error;
-    throw_from_rcl_error(status, "rcl_wait() failed");
-  }
+  current_collection_.waitables.update(
+    collection.waitables,
+    [this](auto waitable) {wait_set_.add_waitable(waitable);},
+    [this](auto waitable) {wait_set_.remove_waitable(waitable);});
 
-  // check the null handles in the wait set and remove them from the handles in memory strategy
-  // for callback-based entities
-  std::lock_guard<std::mutex> guard(mutex_);
-  memory_strategy_->remove_null_handles(&wait_set_);
+  // In the case that an entity already has an expired weak pointer
+  // before being removed from the waitset, additionally prune the waitset.
+  this->wait_set_.prune_deleted_entities();
+  this->entities_need_rebuild_.store(false);
 }
 
-rclcpp::node_interfaces::NodeBaseInterface::SharedPtr
-Executor::get_node_by_group(
-  const rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap &
-  weak_groups_to_nodes,
-  rclcpp::CallbackGroup::SharedPtr group)
+void
+Executor::wait_for_work(std::chrono::nanoseconds timeout)
 {
-  if (!group) {
-    return nullptr;
-  }
-  rclcpp::CallbackGroup::WeakPtr weak_group_ptr(group);
-  const auto finder = weak_groups_to_nodes.find(weak_group_ptr);
-  if (finder != weak_groups_to_nodes.end()) {
-    rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr = finder->second.lock();
-    return node_ptr;
-  }
-  return nullptr;
-}
+  TRACEPOINT(rclcpp_executor_wait_for_work, timeout.count());
 
-rclcpp::CallbackGroup::SharedPtr
-Executor::get_group_by_timer(rclcpp::TimerBase::SharedPtr timer)
-{
-  std::lock_guard<std::mutex> guard{mutex_};
-  for (const auto & pair : weak_groups_associated_with_executor_to_nodes_) {
-    auto group = pair.first.lock();
-    if (!group) {
-      continue;
-    }
-    auto timer_ref = group->find_timer_ptrs_if(
-      [timer](const rclcpp::TimerBase::SharedPtr & timer_ptr) -> bool {
-        return timer_ptr == timer;
-      });
-    if (timer_ref) {
-      return group;
-    }
-  }
+  // Clear any previous wait result
+  this->wait_result_.reset();
 
-  for (const auto & pair : weak_groups_to_nodes_associated_with_executor_) {
-    auto group = pair.first.lock();
-    if (!group) {
-      continue;
-    }
-    auto timer_ref = group->find_timer_ptrs_if(
-      [timer](const rclcpp::TimerBase::SharedPtr & timer_ptr) -> bool {
-        return timer_ptr == timer;
-      });
-    if (timer_ref) {
-      return group;
+  {
+    std::lock_guard<std::mutex> guard(mutex_);
+    if (current_collection_.empty() || this->entities_need_rebuild_.load()) {
+      this->collect_entities();
     }
   }
-  return nullptr;
+  this->wait_result_.emplace(wait_set_.wait(timeout));
+  if (!this->wait_result_ || this->wait_result_->kind() == WaitResultKind::Empty) {
+    RCUTILS_LOG_WARN_NAMED(
+      "rclcpp",
+      "empty wait set received in wait(). This should never happen.");
+  }
 }
 
 bool
 Executor::get_next_ready_executable(AnyExecutable & any_executable)
-{
-  bool success = get_next_ready_executable_from_map(any_executable, weak_groups_to_nodes_);
-  return success;
-}
-
-bool
-Executor::get_next_ready_executable_from_map(
-  AnyExecutable & any_executable,
-  const rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap &
-  weak_groups_to_nodes)
 {
   TRACEPOINT(rclcpp_executor_get_next_ready);
-  bool success = false;
-  std::lock_guard<std::mutex> guard{mutex_};
-  // Check the timers to see if there are any that are ready
-  memory_strategy_->get_next_timer(any_executable, weak_groups_to_nodes);
-  if (any_executable.timer) {
-    success = true;
+
+  bool valid_executable = false;
+
+  if (!wait_result_.has_value() || wait_result_->kind() != rclcpp::WaitResultKind::Ready) {
+    return false;
   }
-  if (!success) {
-    // Check the subscriptions to see if there are any that are ready
-    memory_strategy_->get_next_subscription(any_executable, weak_groups_to_nodes);
-    if (any_executable.subscription) {
-      success = true;
+
+  if (!valid_executable) {
+    size_t current_timer_index = 0;
+    while (true) {
+      auto [timer, timer_index] = wait_result_->peek_next_ready_timer(current_timer_index);
+      if (nullptr == timer) {
+        break;
+      }
+      current_timer_index = timer_index;
+      auto entity_iter = current_collection_.timers.find(timer->get_timer_handle().get());
+      if (entity_iter != current_collection_.timers.end()) {
+        auto callback_group = entity_iter->second.callback_group.lock();
+        if (callback_group && !callback_group->can_be_taken_from()) {
+          current_timer_index++;
+          continue;
+        }
+        // At this point the timer is either ready for execution or was perhaps
+        // it was canceled, based on the result of call(), but either way it
+        // should not be checked again from peek_next_ready_timer(), so clear
+        // it from the wait result.
+        wait_result_->clear_timer_with_index(current_timer_index);
+        // Check that the timer should be called still, i.e. it wasn't canceled.
+        any_executable.data = timer->call();
+        if (!any_executable.data) {
+          current_timer_index++;
+          continue;
+        }
+        any_executable.timer = timer;
+        any_executable.callback_group = callback_group;
+        valid_executable = true;
+        break;
+      }
+      current_timer_index++;
     }
   }
-  if (!success) {
-    // Check the services to see if there are any that are ready
-    memory_strategy_->get_next_service(any_executable, weak_groups_to_nodes);
-    if (any_executable.service) {
-      success = true;
+
+  if (!valid_executable) {
+    while (auto subscription = wait_result_->next_ready_subscription()) {
+      auto entity_iter = current_collection_.subscriptions.find(
+        subscription->get_subscription_handle().get());
+      if (entity_iter != current_collection_.subscriptions.end()) {
+        auto callback_group = entity_iter->second.callback_group.lock();
+        if (callback_group && !callback_group->can_be_taken_from()) {
+          continue;
+        }
+        any_executable.subscription = subscription;
+        any_executable.callback_group = callback_group;
+        valid_executable = true;
+        break;
+      }
     }
   }
-  if (!success) {
-    // Check the clients to see if there are any that are ready
-    memory_strategy_->get_next_client(any_executable, weak_groups_to_nodes);
-    if (any_executable.client) {
-      success = true;
+
+  if (!valid_executable) {
+    while (auto service = wait_result_->next_ready_service()) {
+      auto entity_iter = current_collection_.services.find(service->get_service_handle().get());
+      if (entity_iter != current_collection_.services.end()) {
+        auto callback_group = entity_iter->second.callback_group.lock();
+        if (callback_group && !callback_group->can_be_taken_from()) {
+          continue;
+        }
+        any_executable.service = service;
+        any_executable.callback_group = callback_group;
+        valid_executable = true;
+        break;
+      }
     }
   }
-  if (!success) {
-    // Check the waitables to see if there are any that are ready
-    memory_strategy_->get_next_waitable(any_executable, weak_groups_to_nodes);
-    if (any_executable.waitable) {
-      any_executable.data = any_executable.waitable->take_data();
-      success = true;
+
+  if (!valid_executable) {
+    while (auto client = wait_result_->next_ready_client()) {
+      auto entity_iter = current_collection_.clients.find(client->get_client_handle().get());
+      if (entity_iter != current_collection_.clients.end()) {
+        auto callback_group = entity_iter->second.callback_group.lock();
+        if (callback_group && !callback_group->can_be_taken_from()) {
+          continue;
+        }
+        any_executable.client = client;
+        any_executable.callback_group = callback_group;
+        valid_executable = true;
+        break;
+      }
     }
   }
-  // At this point any_executable should be valid with either a valid subscription
-  // or a valid timer, or it should be a null shared_ptr
-  if (success) {
-    rclcpp::CallbackGroup::WeakPtr weak_group_ptr = any_executable.callback_group;
-    auto iter = weak_groups_to_nodes.find(weak_group_ptr);
-    if (iter == weak_groups_to_nodes.end()) {
-      success = false;
+
+  if (!valid_executable) {
+    while (auto waitable = wait_result_->next_ready_waitable()) {
+      auto entity_iter = current_collection_.waitables.find(waitable.get());
+      if (entity_iter != current_collection_.waitables.end()) {
+        auto callback_group = entity_iter->second.callback_group.lock();
+        if (callback_group && !callback_group->can_be_taken_from()) {
+          continue;
+        }
+        any_executable.waitable = waitable;
+        any_executable.callback_group = callback_group;
+        any_executable.data = waitable->take_data();
+        valid_executable = true;
+        break;
+      }
     }
   }
 
-  if (success) {
-    // If it is valid, check to see if the group is mutually exclusive or
-    // not, then mark it accordingly ..Check if the callback_group belongs to this executor
-    if (any_executable.callback_group && any_executable.callback_group->type() == \
-      CallbackGroupType::MutuallyExclusive)
-    {
-      // It should not have been taken otherwise
+  if (any_executable.callback_group) {
+    if (any_executable.callback_group->type() == CallbackGroupType::MutuallyExclusive) {
       assert(any_executable.callback_group->can_be_taken_from().load());
-      // Set to false to indicate something is being run from this group
-      // This is reset to true either when the any_executable is executed or when the
-      // any_executable is destructued
       any_executable.callback_group->can_be_taken_from().store(false);
     }
   }
-  // If there is no ready executable, return false
-  return success;
+
+
+  return valid_executable;
 }
 
 bool
@@ -939,22 +762,6 @@ Executor::get_next_executable(AnyExecutable & any_executable, std::chrono::nanos
   return success;
 }
 
-// Returns true iff the weak_groups_to_nodes map has node_ptr as the value in any of its entry.
-bool
-Executor::has_node(
-  const rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-  const rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap &
-  weak_groups_to_nodes) const
-{
-  return std::find_if(
-    weak_groups_to_nodes.begin(),
-    weak_groups_to_nodes.end(),
-    [&](const WeakCallbackGroupsToNodesMap::value_type & other) -> bool {
-      auto other_ptr = other.second.lock();
-      return other_ptr == node_ptr;
-    }) != weak_groups_to_nodes.end();
-}
-
 bool
 Executor::is_spinning()
 {
diff --git a/rclcpp/src/rclcpp/executors/executor_entities_collection.cpp b/rclcpp/src/rclcpp/executors/executor_entities_collection.cpp
index 88a878824a..b6e030d340 100644
--- a/rclcpp/src/rclcpp/executors/executor_entities_collection.cpp
+++ b/rclcpp/src/rclcpp/executors/executor_entities_collection.cpp
@@ -39,6 +39,30 @@ void ExecutorEntitiesCollection::clear()
   waitables.clear();
 }
 
+size_t ExecutorEntitiesCollection::remove_expired_entities()
+{
+  auto remove_entities = [](auto & collection) -> size_t {
+      size_t removed = 0;
+      for (auto it = collection.begin(); it != collection.end(); ) {
+        if (it->second.entity.expired()) {
+          ++removed;
+          it = collection.erase(it);
+        } else {
+          ++it;
+        }
+      }
+      return removed;
+    };
+
+  return
+    remove_entities(subscriptions) +
+    remove_entities(timers) +
+    remove_entities(guard_conditions) +
+    remove_entities(clients) +
+    remove_entities(services) +
+    remove_entities(waitables);
+}
+
 void
 build_entities_collection(
   const std::vector<rclcpp::CallbackGroup::WeakPtr> & callback_groups,
@@ -203,7 +227,7 @@ ready_executables(
     }
   }
 
-  for (auto & [handle, entry] : collection.waitables) {
+  for (const auto & [handle, entry] : collection.waitables) {
     auto waitable = entry.entity.lock();
     if (!waitable) {
       continue;
@@ -218,13 +242,10 @@ ready_executables(
     rclcpp::AnyExecutable exec;
     exec.waitable = waitable;
     exec.callback_group = group_info;
-    exec.data = waitable->take_data();
     executables.push_back(exec);
     added++;
   }
-
   return added;
 }
-
 }  // namespace executors
 }  // namespace rclcpp
diff --git a/rclcpp/src/rclcpp/executors/executor_entities_collector.cpp b/rclcpp/src/rclcpp/executors/executor_entities_collector.cpp
index 84ada64925..702716a758 100644
--- a/rclcpp/src/rclcpp/executors/executor_entities_collector.cpp
+++ b/rclcpp/src/rclcpp/executors/executor_entities_collector.cpp
@@ -61,6 +61,12 @@ ExecutorEntitiesCollector::~ExecutorEntitiesCollector()
     if (group_ptr) {
       group_ptr->get_associated_with_executor_atomic().store(false);
     }
+    // Disassociate the guard condition from the executor notify waitable
+    auto guard_condition_it = weak_groups_to_guard_conditions_.find(weak_group_ptr);
+    if (guard_condition_it != weak_groups_to_guard_conditions_.end()) {
+      this->notify_waitable_->remove_guard_condition(guard_condition_it->second);
+      weak_groups_to_guard_conditions_.erase(guard_condition_it);
+    }
   }
   pending_manually_added_groups_.clear();
   pending_manually_removed_groups_.clear();
@@ -105,7 +111,8 @@ void
 ExecutorEntitiesCollector::remove_node(
   rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr)
 {
-  if (!node_ptr->get_associated_with_executor_atomic().load()) {
+  std::atomic_bool & has_executor = node_ptr->get_associated_with_executor_atomic();
+  if (!has_executor.exchange(false)) {
     throw std::runtime_error(
             std::string("Node '") + node_ptr->get_fully_qualified_name() +
             "' needs to be associated with an executor.");
@@ -143,6 +150,11 @@ ExecutorEntitiesCollector::add_callback_group(rclcpp::CallbackGroup::SharedPtr g
   }
 
   this->pending_manually_added_groups_.insert(group_ptr);
+
+  // Store callback group notify guard condition in map and add it to the notify waitable
+  auto group_guard_condition = group_ptr->get_notify_guard_condition();
+  weak_groups_to_guard_conditions_.insert({group_ptr, group_guard_condition});
+  this->notify_waitable_->add_guard_condition(group_guard_condition);
 }
 
 void
@@ -161,7 +173,6 @@ ExecutorEntitiesCollector::remove_callback_group(rclcpp::CallbackGroup::SharedPt
     throw std::runtime_error("Node must not be deleted before its callback group(s).");
   }
   */
-
   auto weak_group_ptr = rclcpp::CallbackGroup::WeakPtr(group_ptr);
   std::lock_guard<std::mutex> lock(mutex_);
   bool associated = manually_added_groups_.count(group_ptr) != 0;
@@ -314,7 +325,11 @@ ExecutorEntitiesCollector::process_queues()
     if (node_it != weak_nodes_.end()) {
       remove_weak_node(node_it);
     } else {
-      throw std::runtime_error("Node needs to be associated with this executor.");
+      // The node may have been destroyed and removed from the colletion before
+      // we processed the queues.  Don't throw if the pointer is already expired.
+      if (!weak_node_ptr.expired()) {
+        throw std::runtime_error("Node needs to be associated with this executor.");
+      }
     }
 
     auto node_ptr = weak_node_ptr.lock();
@@ -337,6 +352,13 @@ ExecutorEntitiesCollector::process_queues()
     auto group_ptr = weak_group_ptr.lock();
     if (group_ptr) {
       this->add_callback_group_to_collection(group_ptr, manually_added_groups_);
+    } else {
+      // Disassociate the guard condition from the executor notify waitable
+      auto guard_condition_it = weak_groups_to_guard_conditions_.find(weak_group_ptr);
+      if (guard_condition_it != weak_groups_to_guard_conditions_.end()) {
+        this->notify_waitable_->remove_guard_condition(guard_condition_it->second);
+        weak_groups_to_guard_conditions_.erase(guard_condition_it);
+      }
     }
   }
   pending_manually_added_groups_.clear();
diff --git a/rclcpp/src/rclcpp/executors/executor_notify_waitable.cpp b/rclcpp/src/rclcpp/executors/executor_notify_waitable.cpp
index c0ad8a25a4..85bedcead1 100644
--- a/rclcpp/src/rclcpp/executors/executor_notify_waitable.cpp
+++ b/rclcpp/src/rclcpp/executors/executor_notify_waitable.cpp
@@ -27,15 +27,17 @@ ExecutorNotifyWaitable::ExecutorNotifyWaitable(std::function<void(void)> on_exec
 {
 }
 
-ExecutorNotifyWaitable::ExecutorNotifyWaitable(const ExecutorNotifyWaitable & other)
-: ExecutorNotifyWaitable(other.execute_callback_)
+ExecutorNotifyWaitable::ExecutorNotifyWaitable(ExecutorNotifyWaitable & other)
 {
+  std::lock_guard<std::mutex> lock(other.guard_condition_mutex_);
+  this->execute_callback_ = other.execute_callback_;
   this->notify_guard_conditions_ = other.notify_guard_conditions_;
 }
 
-ExecutorNotifyWaitable & ExecutorNotifyWaitable::operator=(const ExecutorNotifyWaitable & other)
+ExecutorNotifyWaitable & ExecutorNotifyWaitable::operator=(ExecutorNotifyWaitable & other)
 {
   if (this != &other) {
+    std::lock_guard<std::mutex> lock(other.guard_condition_mutex_);
     this->execute_callback_ = other.execute_callback_;
     this->notify_guard_conditions_ = other.notify_guard_conditions_;
   }
@@ -46,20 +48,17 @@ void
 ExecutorNotifyWaitable::add_to_wait_set(rcl_wait_set_t * wait_set)
 {
   std::lock_guard<std::mutex> lock(guard_condition_mutex_);
-
   for (auto weak_guard_condition : this->notify_guard_conditions_) {
     auto guard_condition = weak_guard_condition.lock();
-    if (guard_condition) {
-      auto rcl_guard_condition = &guard_condition->get_rcl_guard_condition();
+    if (!guard_condition) {continue;}
 
-      rcl_ret_t ret = rcl_wait_set_add_guard_condition(
-        wait_set,
-        rcl_guard_condition, NULL);
+    rcl_guard_condition_t * cond = &guard_condition->get_rcl_guard_condition();
 
-      if (RCL_RET_OK != ret) {
-        rclcpp::exceptions::throw_from_rcl_error(
-          ret, "failed to add guard condition to wait set");
-      }
+    rcl_ret_t ret = rcl_wait_set_add_guard_condition(wait_set, cond, NULL);
+
+    if (RCL_RET_OK != ret) {
+      rclcpp::exceptions::throw_from_rcl_error(
+        ret, "failed to add guard condition to wait set");
     }
   }
 }
@@ -71,15 +70,16 @@ ExecutorNotifyWaitable::is_ready(rcl_wait_set_t * wait_set)
 
   bool any_ready = false;
   for (size_t ii = 0; ii < wait_set->size_of_guard_conditions; ++ii) {
-    auto rcl_guard_condition = wait_set->guard_conditions[ii];
+    const auto * rcl_guard_condition = wait_set->guard_conditions[ii];
 
     if (nullptr == rcl_guard_condition) {
       continue;
     }
-    for (auto weak_guard_condition : this->notify_guard_conditions_) {
+    for (const auto & weak_guard_condition : this->notify_guard_conditions_) {
       auto guard_condition = weak_guard_condition.lock();
       if (guard_condition && &guard_condition->get_rcl_guard_condition() == rcl_guard_condition) {
         any_ready = true;
+        break;
       }
     }
   }
@@ -99,6 +99,52 @@ ExecutorNotifyWaitable::take_data()
   return nullptr;
 }
 
+std::shared_ptr<void>
+ExecutorNotifyWaitable::take_data_by_entity_id(size_t id)
+{
+  (void) id;
+  return nullptr;
+}
+
+void
+ExecutorNotifyWaitable::set_on_ready_callback(std::function<void(size_t, int)> callback)
+{
+  // The second argument of the callback could be used to identify which guard condition
+  // triggered the event.
+  // We could indicate which of the guard conditions was triggered, but the executor
+  // is already going to check that.
+  auto gc_callback = [callback](size_t count) {
+      callback(count, 0);
+    };
+
+  std::lock_guard<std::mutex> lock(guard_condition_mutex_);
+
+  on_ready_callback_ = gc_callback;
+  for (auto weak_gc : notify_guard_conditions_) {
+    auto gc = weak_gc.lock();
+    if (!gc) {
+      continue;
+    }
+    gc->set_on_trigger_callback(on_ready_callback_);
+  }
+}
+
+RCLCPP_PUBLIC
+void
+ExecutorNotifyWaitable::clear_on_ready_callback()
+{
+  std::lock_guard<std::mutex> lock(guard_condition_mutex_);
+
+  on_ready_callback_ = nullptr;
+  for (auto weak_gc : notify_guard_conditions_) {
+    auto gc = weak_gc.lock();
+    if (!gc) {
+      continue;
+    }
+    gc->set_on_trigger_callback(nullptr);
+  }
+}
+
 void
 ExecutorNotifyWaitable::add_guard_condition(rclcpp::GuardCondition::WeakPtr weak_guard_condition)
 {
@@ -106,15 +152,23 @@ ExecutorNotifyWaitable::add_guard_condition(rclcpp::GuardCondition::WeakPtr weak
   auto guard_condition = weak_guard_condition.lock();
   if (guard_condition && notify_guard_conditions_.count(weak_guard_condition) == 0) {
     notify_guard_conditions_.insert(weak_guard_condition);
+    if (on_ready_callback_) {
+      guard_condition->set_on_trigger_callback(on_ready_callback_);
+    }
   }
 }
 
 void
-ExecutorNotifyWaitable::remove_guard_condition(rclcpp::GuardCondition::WeakPtr guard_condition)
+ExecutorNotifyWaitable::remove_guard_condition(rclcpp::GuardCondition::WeakPtr weak_guard_condition)
 {
   std::lock_guard<std::mutex> lock(guard_condition_mutex_);
-  if (notify_guard_conditions_.count(guard_condition) != 0) {
-    notify_guard_conditions_.erase(guard_condition);
+  if (notify_guard_conditions_.count(weak_guard_condition) != 0) {
+    notify_guard_conditions_.erase(weak_guard_condition);
+    auto guard_condition = weak_guard_condition.lock();
+    // If this notify waitable doesn't have an on_ready_callback, then there's nothing to unset
+    if (guard_condition && on_ready_callback_) {
+      guard_condition->set_on_trigger_callback(nullptr);
+    }
   }
 }
 
diff --git a/rclcpp/src/rclcpp/executors/multi_threaded_executor.cpp b/rclcpp/src/rclcpp/executors/multi_threaded_executor.cpp
index bb477690be..fb4e345098 100644
--- a/rclcpp/src/rclcpp/executors/multi_threaded_executor.cpp
+++ b/rclcpp/src/rclcpp/executors/multi_threaded_executor.cpp
@@ -48,7 +48,7 @@ MultiThreadedExecutor::spin()
   if (spinning.exchange(true)) {
     throw std::runtime_error("spin() called while already spinning");
   }
-  RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
+  RCPPUTILS_SCOPE_EXIT(this->spinning.store(false););
   std::vector<std::thread> threads;
   size_t thread_id = 0;
   {
@@ -92,6 +92,18 @@ MultiThreadedExecutor::run(size_t this_thread_number)
 
     execute_any_executable(any_exec);
 
+    if (any_exec.callback_group &&
+      any_exec.callback_group->type() == CallbackGroupType::MutuallyExclusive)
+    {
+      try {
+        interrupt_guard_condition_->trigger();
+      } catch (const rclcpp::exceptions::RCLError & ex) {
+        throw std::runtime_error(
+                std::string(
+                  "Failed to trigger guard condition on callback group change: ") + ex.what());
+      }
+    }
+
     // Clear the callback_group to prevent the AnyExecutable destructor from
     // resetting the callback group `can_be_taken_from`
     any_exec.callback_group.reset();
diff --git a/rclcpp/src/rclcpp/executors/single_threaded_executor.cpp b/rclcpp/src/rclcpp/executors/single_threaded_executor.cpp
index e7f311c147..975733b497 100644
--- a/rclcpp/src/rclcpp/executors/single_threaded_executor.cpp
+++ b/rclcpp/src/rclcpp/executors/single_threaded_executor.cpp
@@ -31,6 +31,11 @@ SingleThreadedExecutor::spin()
     throw std::runtime_error("spin() called while already spinning");
   }
   RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
+
+  // Clear any previous result and rebuild the waitset
+  this->wait_result_.reset();
+  this->entities_need_rebuild_ = true;
+
   while (rclcpp::ok(this->context_) && spinning.load()) {
     rclcpp::AnyExecutable any_executable;
     if (get_next_executable(any_executable)) {
diff --git a/rclcpp/src/rclcpp/executors/static_executor_entities_collector.cpp b/rclcpp/src/rclcpp/executors/static_executor_entities_collector.cpp
deleted file mode 100644
index 6fd0b56a85..0000000000
--- a/rclcpp/src/rclcpp/executors/static_executor_entities_collector.cpp
+++ /dev/null
@@ -1,524 +0,0 @@
-// Copyright 2020 Open Source Robotics Foundation, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "rclcpp/executors/static_executor_entities_collector.hpp"
-
-#include <algorithm>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "rclcpp/memory_strategy.hpp"
-#include "rclcpp/executors/static_single_threaded_executor.hpp"
-#include "rclcpp/detail/add_guard_condition_to_rcl_wait_set.hpp"
-
-using rclcpp::executors::StaticExecutorEntitiesCollector;
-
-StaticExecutorEntitiesCollector::~StaticExecutorEntitiesCollector()
-{
-  // Disassociate all callback groups and thus nodes.
-  for (const auto & pair : weak_groups_associated_with_executor_to_nodes_) {
-    auto group = pair.first.lock();
-    if (group) {
-      std::atomic_bool & has_executor = group->get_associated_with_executor_atomic();
-      has_executor.store(false);
-    }
-  }
-  for (const auto & pair : weak_groups_to_nodes_associated_with_executor_) {
-    auto group = pair.first.lock();
-    if (group) {
-      std::atomic_bool & has_executor = group->get_associated_with_executor_atomic();
-      has_executor.store(false);
-    }
-  }
-  // Disassociate all nodes
-  for (const auto & weak_node : weak_nodes_) {
-    auto node = weak_node.lock();
-    if (node) {
-      std::atomic_bool & has_executor = node->get_associated_with_executor_atomic();
-      has_executor.store(false);
-    }
-  }
-  weak_groups_associated_with_executor_to_nodes_.clear();
-  weak_groups_to_nodes_associated_with_executor_.clear();
-  exec_list_.clear();
-  weak_nodes_.clear();
-  weak_nodes_to_guard_conditions_.clear();
-}
-
-void
-StaticExecutorEntitiesCollector::init(
-  rcl_wait_set_t * p_wait_set,
-  rclcpp::memory_strategy::MemoryStrategy::SharedPtr memory_strategy)
-{
-  // Empty initialize executable list
-  exec_list_ = rclcpp::experimental::ExecutableList();
-  // Get executor's wait_set_ pointer
-  p_wait_set_ = p_wait_set;
-  // Get executor's memory strategy ptr
-  if (memory_strategy == nullptr) {
-    throw std::runtime_error("Received NULL memory strategy in executor waitable.");
-  }
-  memory_strategy_ = memory_strategy;
-
-  // Get memory strategy and executable list. Prepare wait_set_
-  std::shared_ptr<void> shared_ptr;
-  execute(shared_ptr);
-
-  // The entities collector is now initialized
-  initialized_ = true;
-}
-
-void
-StaticExecutorEntitiesCollector::fini()
-{
-  memory_strategy_->clear_handles();
-  exec_list_.clear();
-}
-
-std::shared_ptr<void>
-StaticExecutorEntitiesCollector::take_data()
-{
-  return nullptr;
-}
-
-void
-StaticExecutorEntitiesCollector::execute(std::shared_ptr<void> & data)
-{
-  (void) data;
-  // Fill memory strategy with entities coming from weak_nodes_
-  fill_memory_strategy();
-  // Fill exec_list_ with entities coming from weak_nodes_ (same as memory strategy)
-  fill_executable_list();
-  // Resize the wait_set_ based on memory_strategy handles (rcl_wait_set_resize)
-  prepare_wait_set();
-  // Add new nodes guard conditions to map
-  std::lock_guard<std::mutex> guard{new_nodes_mutex_};
-  for (const auto & weak_node : new_nodes_) {
-    if (auto node_ptr = weak_node.lock()) {
-      weak_nodes_to_guard_conditions_[node_ptr] =
-        node_ptr->get_shared_notify_guard_condition().get();
-    }
-  }
-  new_nodes_.clear();
-}
-
-void
-StaticExecutorEntitiesCollector::fill_memory_strategy()
-{
-  memory_strategy_->clear_handles();
-  bool has_invalid_weak_groups_or_nodes =
-    memory_strategy_->collect_entities(weak_groups_to_nodes_associated_with_executor_);
-  // Clean up any invalid nodes, if they were detected
-  if (has_invalid_weak_groups_or_nodes) {
-    std::vector<rclcpp::CallbackGroup::WeakPtr> invalid_group_ptrs;
-    for (const auto & pair : weak_groups_to_nodes_associated_with_executor_) {
-      auto & weak_group_ptr = pair.first;
-      auto & weak_node_ptr = pair.second;
-      if (weak_group_ptr.expired() || weak_node_ptr.expired()) {
-        invalid_group_ptrs.push_back(weak_group_ptr);
-      }
-    }
-    std::for_each(
-      invalid_group_ptrs.begin(), invalid_group_ptrs.end(),
-      [this](rclcpp::CallbackGroup::WeakPtr group_ptr) {
-        weak_groups_to_nodes_associated_with_executor_.erase(group_ptr);
-      });
-  }
-  has_invalid_weak_groups_or_nodes =
-    memory_strategy_->collect_entities(weak_groups_associated_with_executor_to_nodes_);
-  // Clean up any invalid nodes, if they were detected
-  if (has_invalid_weak_groups_or_nodes) {
-    std::vector<rclcpp::CallbackGroup::WeakPtr> invalid_group_ptrs;
-    for (const auto & pair : weak_groups_associated_with_executor_to_nodes_) {
-      auto & weak_group_ptr = pair.first;
-      const auto & weak_node_ptr = pair.second;
-      if (weak_group_ptr.expired() || weak_node_ptr.expired()) {
-        invalid_group_ptrs.push_back(weak_group_ptr);
-      }
-    }
-    std::for_each(
-      invalid_group_ptrs.begin(), invalid_group_ptrs.end(),
-      [this](rclcpp::CallbackGroup::WeakPtr group_ptr) {
-        weak_groups_associated_with_executor_to_nodes_.erase(group_ptr);
-      });
-  }
-
-  // Add the static executor waitable to the memory strategy
-  memory_strategy_->add_waitable_handle(this->shared_from_this());
-}
-
-void
-StaticExecutorEntitiesCollector::fill_executable_list()
-{
-  exec_list_.clear();
-  add_callback_groups_from_nodes_associated_to_executor();
-  fill_executable_list_from_map(weak_groups_associated_with_executor_to_nodes_);
-  fill_executable_list_from_map(weak_groups_to_nodes_associated_with_executor_);
-  // Add the executor's waitable to the executable list
-  exec_list_.add_waitable(shared_from_this());
-}
-void
-StaticExecutorEntitiesCollector::fill_executable_list_from_map(
-  const rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap &
-  weak_groups_to_nodes)
-{
-  for (const auto & pair : weak_groups_to_nodes) {
-    auto group = pair.first.lock();
-    auto node = pair.second.lock();
-    if (!node || !group || !group->can_be_taken_from().load()) {
-      continue;
-    }
-    group->find_timer_ptrs_if(
-      [this](const rclcpp::TimerBase::SharedPtr & timer) {
-        if (timer) {
-          exec_list_.add_timer(timer);
-        }
-        return false;
-      });
-    group->find_subscription_ptrs_if(
-      [this](const rclcpp::SubscriptionBase::SharedPtr & subscription) {
-        if (subscription) {
-          exec_list_.add_subscription(subscription);
-        }
-        return false;
-      });
-    group->find_service_ptrs_if(
-      [this](const rclcpp::ServiceBase::SharedPtr & service) {
-        if (service) {
-          exec_list_.add_service(service);
-        }
-        return false;
-      });
-    group->find_client_ptrs_if(
-      [this](const rclcpp::ClientBase::SharedPtr & client) {
-        if (client) {
-          exec_list_.add_client(client);
-        }
-        return false;
-      });
-    group->find_waitable_ptrs_if(
-      [this](const rclcpp::Waitable::SharedPtr & waitable) {
-        if (waitable) {
-          exec_list_.add_waitable(waitable);
-        }
-        return false;
-      });
-  }
-}
-
-void
-StaticExecutorEntitiesCollector::prepare_wait_set()
-{
-  // clear wait set
-  if (rcl_wait_set_clear(p_wait_set_) != RCL_RET_OK) {
-    throw std::runtime_error("Couldn't clear wait set");
-  }
-
-  // The size of waitables are accounted for in size of the other entities
-  rcl_ret_t ret = rcl_wait_set_resize(
-    p_wait_set_, memory_strategy_->number_of_ready_subscriptions(),
-    memory_strategy_->number_of_guard_conditions(), memory_strategy_->number_of_ready_timers(),
-    memory_strategy_->number_of_ready_clients(), memory_strategy_->number_of_ready_services(),
-    memory_strategy_->number_of_ready_events());
-
-  if (RCL_RET_OK != ret) {
-    throw std::runtime_error(
-            std::string("Couldn't resize the wait set: ") + rcl_get_error_string().str);
-  }
-}
-
-void
-StaticExecutorEntitiesCollector::refresh_wait_set(std::chrono::nanoseconds timeout)
-{
-  // clear wait set (memset to '0' all wait_set_ entities
-  // but keeps the wait_set_ number of entities)
-  if (rcl_wait_set_clear(p_wait_set_) != RCL_RET_OK) {
-    throw std::runtime_error("Couldn't clear wait set");
-  }
-
-  if (!memory_strategy_->add_handles_to_wait_set(p_wait_set_)) {
-    throw std::runtime_error("Couldn't fill wait set");
-  }
-
-  rcl_ret_t status =
-    rcl_wait(p_wait_set_, std::chrono::duration_cast<std::chrono::nanoseconds>(timeout).count());
-
-  if (status == RCL_RET_WAIT_SET_EMPTY) {
-    RCUTILS_LOG_WARN_NAMED(
-      "rclcpp",
-      "empty wait set received in rcl_wait(). This should never happen.");
-  } else if (status != RCL_RET_OK && status != RCL_RET_TIMEOUT) {
-    using rclcpp::exceptions::throw_from_rcl_error;
-    throw_from_rcl_error(status, "rcl_wait() failed");
-  }
-}
-
-void
-StaticExecutorEntitiesCollector::add_to_wait_set(rcl_wait_set_t * wait_set)
-{
-  // Add waitable guard conditions (one for each registered node) into the wait set.
-  for (const auto & pair : weak_nodes_to_guard_conditions_) {
-    auto & gc = pair.second;
-    detail::add_guard_condition_to_rcl_wait_set(*wait_set, *gc);
-  }
-}
-
-size_t StaticExecutorEntitiesCollector::get_number_of_ready_guard_conditions()
-{
-  std::lock_guard<std::mutex> guard{new_nodes_mutex_};
-  return weak_nodes_to_guard_conditions_.size() + new_nodes_.size();
-}
-
-bool
-StaticExecutorEntitiesCollector::add_node(
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr)
-{
-  bool is_new_node = false;
-  // If the node already has an executor
-  std::atomic_bool & has_executor = node_ptr->get_associated_with_executor_atomic();
-  if (has_executor.exchange(true)) {
-    throw std::runtime_error("Node has already been added to an executor.");
-  }
-  node_ptr->for_each_callback_group(
-    [this, node_ptr, &is_new_node](rclcpp::CallbackGroup::SharedPtr group_ptr)
-    {
-      if (
-        !group_ptr->get_associated_with_executor_atomic().load() &&
-        group_ptr->automatically_add_to_executor_with_node())
-      {
-        is_new_node = (add_callback_group(
-          group_ptr,
-          node_ptr,
-          weak_groups_to_nodes_associated_with_executor_) ||
-        is_new_node);
-      }
-    });
-  weak_nodes_.push_back(node_ptr);
-  return is_new_node;
-}
-
-bool
-StaticExecutorEntitiesCollector::add_callback_group(
-  rclcpp::CallbackGroup::SharedPtr group_ptr,
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-  rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap & weak_groups_to_nodes)
-{
-  // If the callback_group already has an executor
-  std::atomic_bool & has_executor = group_ptr->get_associated_with_executor_atomic();
-  if (has_executor.exchange(true)) {
-    throw std::runtime_error("Callback group has already been added to an executor.");
-  }
-  bool is_new_node = !has_node(node_ptr, weak_groups_associated_with_executor_to_nodes_) &&
-    !has_node(node_ptr, weak_groups_to_nodes_associated_with_executor_);
-  rclcpp::CallbackGroup::WeakPtr weak_group_ptr = group_ptr;
-  auto insert_info = weak_groups_to_nodes.insert(
-    std::make_pair(weak_group_ptr, node_ptr));
-  bool was_inserted = insert_info.second;
-  if (!was_inserted) {
-    throw std::runtime_error("Callback group was already added to executor.");
-  }
-  if (is_new_node) {
-    std::lock_guard<std::mutex> guard{new_nodes_mutex_};
-    new_nodes_.push_back(node_ptr);
-    return true;
-  }
-  return false;
-}
-
-bool
-StaticExecutorEntitiesCollector::add_callback_group(
-  rclcpp::CallbackGroup::SharedPtr group_ptr,
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr)
-{
-  return add_callback_group(group_ptr, node_ptr, weak_groups_associated_with_executor_to_nodes_);
-}
-
-bool
-StaticExecutorEntitiesCollector::remove_callback_group(
-  rclcpp::CallbackGroup::SharedPtr group_ptr)
-{
-  return this->remove_callback_group_from_map(
-    group_ptr,
-    weak_groups_associated_with_executor_to_nodes_);
-}
-
-bool
-StaticExecutorEntitiesCollector::remove_callback_group_from_map(
-  rclcpp::CallbackGroup::SharedPtr group_ptr,
-  rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap & weak_groups_to_nodes)
-{
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr;
-  rclcpp::CallbackGroup::WeakPtr weak_group_ptr = group_ptr;
-  auto iter = weak_groups_to_nodes.find(weak_group_ptr);
-  if (iter != weak_groups_to_nodes.end()) {
-    node_ptr = iter->second.lock();
-    if (node_ptr == nullptr) {
-      throw std::runtime_error("Node must not be deleted before its callback group(s).");
-    }
-    weak_groups_to_nodes.erase(iter);
-  } else {
-    throw std::runtime_error("Callback group needs to be associated with executor.");
-  }
-  // If the node was matched and removed, interrupt waiting.
-  if (!has_node(node_ptr, weak_groups_associated_with_executor_to_nodes_) &&
-    !has_node(node_ptr, weak_groups_to_nodes_associated_with_executor_))
-  {
-    rclcpp::node_interfaces::NodeBaseInterface::WeakPtr node_weak_ptr(node_ptr);
-    weak_nodes_to_guard_conditions_.erase(node_weak_ptr);
-    return true;
-  }
-  return false;
-}
-
-bool
-StaticExecutorEntitiesCollector::remove_node(
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr)
-{
-  if (!node_ptr->get_associated_with_executor_atomic().load()) {
-    return false;
-  }
-  bool node_found = false;
-  auto node_it = weak_nodes_.begin();
-  while (node_it != weak_nodes_.end()) {
-    bool matched = (node_it->lock() == node_ptr);
-    if (matched) {
-      weak_nodes_.erase(node_it);
-      node_found = true;
-      break;
-    }
-    ++node_it;
-  }
-  if (!node_found) {
-    return false;
-  }
-  std::vector<rclcpp::CallbackGroup::SharedPtr> found_group_ptrs;
-  std::for_each(
-    weak_groups_to_nodes_associated_with_executor_.begin(),
-    weak_groups_to_nodes_associated_with_executor_.end(),
-    [&found_group_ptrs, node_ptr](std::pair<rclcpp::CallbackGroup::WeakPtr,
-    rclcpp::node_interfaces::NodeBaseInterface::WeakPtr> key_value_pair) {
-      auto & weak_node_ptr = key_value_pair.second;
-      auto shared_node_ptr = weak_node_ptr.lock();
-      auto group_ptr = key_value_pair.first.lock();
-      if (shared_node_ptr == node_ptr) {
-        found_group_ptrs.push_back(group_ptr);
-      }
-    });
-  std::for_each(
-    found_group_ptrs.begin(), found_group_ptrs.end(), [this]
-      (rclcpp::CallbackGroup::SharedPtr group_ptr) {
-      this->remove_callback_group_from_map(
-        group_ptr,
-        weak_groups_to_nodes_associated_with_executor_);
-    });
-  std::atomic_bool & has_executor = node_ptr->get_associated_with_executor_atomic();
-  has_executor.store(false);
-  return true;
-}
-
-bool
-StaticExecutorEntitiesCollector::is_ready(rcl_wait_set_t * p_wait_set)
-{
-  // Check wait_set guard_conditions for added/removed entities to/from a node
-  for (size_t i = 0; i < p_wait_set->size_of_guard_conditions; ++i) {
-    if (p_wait_set->guard_conditions[i] != NULL) {
-      auto found_guard_condition = std::find_if(
-        weak_nodes_to_guard_conditions_.begin(), weak_nodes_to_guard_conditions_.end(),
-        [&](std::pair<rclcpp::node_interfaces::NodeBaseInterface::WeakPtr,
-        const GuardCondition *> pair) -> bool {
-          const rcl_guard_condition_t & rcl_gc = pair.second->get_rcl_guard_condition();
-          return &rcl_gc == p_wait_set->guard_conditions[i];
-        });
-      if (found_guard_condition != weak_nodes_to_guard_conditions_.end()) {
-        return true;
-      }
-    }
-  }
-  // None of the guard conditions triggered belong to a registered node
-  return false;
-}
-
-// Returns true iff the weak_groups_to_nodes map has node_ptr as the value in any of its entry.
-bool
-StaticExecutorEntitiesCollector::has_node(
-  const rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-  const rclcpp::memory_strategy::MemoryStrategy::WeakCallbackGroupsToNodesMap &
-  weak_groups_to_nodes) const
-{
-  return std::find_if(
-    weak_groups_to_nodes.begin(),
-    weak_groups_to_nodes.end(),
-    [&](const WeakCallbackGroupsToNodesMap::value_type & other) -> bool {
-      auto other_ptr = other.second.lock();
-      return other_ptr == node_ptr;
-    }) != weak_groups_to_nodes.end();
-}
-
-void
-StaticExecutorEntitiesCollector::add_callback_groups_from_nodes_associated_to_executor()
-{
-  for (const auto & weak_node : weak_nodes_) {
-    auto node = weak_node.lock();
-    if (node) {
-      node->for_each_callback_group(
-        [this, node](rclcpp::CallbackGroup::SharedPtr shared_group_ptr)
-        {
-          if (shared_group_ptr->automatically_add_to_executor_with_node() &&
-          !shared_group_ptr->get_associated_with_executor_atomic().load())
-          {
-            add_callback_group(
-              shared_group_ptr,
-              node,
-              weak_groups_to_nodes_associated_with_executor_);
-          }
-        });
-    }
-  }
-}
-
-std::vector<rclcpp::CallbackGroup::WeakPtr>
-StaticExecutorEntitiesCollector::get_all_callback_groups()
-{
-  std::vector<rclcpp::CallbackGroup::WeakPtr> groups;
-  for (const auto & group_node_ptr : weak_groups_associated_with_executor_to_nodes_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  for (const auto & group_node_ptr : weak_groups_to_nodes_associated_with_executor_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  return groups;
-}
-
-std::vector<rclcpp::CallbackGroup::WeakPtr>
-StaticExecutorEntitiesCollector::get_manually_added_callback_groups()
-{
-  std::vector<rclcpp::CallbackGroup::WeakPtr> groups;
-  for (const auto & group_node_ptr : weak_groups_associated_with_executor_to_nodes_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  return groups;
-}
-
-std::vector<rclcpp::CallbackGroup::WeakPtr>
-StaticExecutorEntitiesCollector::get_automatically_added_callback_groups_from_nodes()
-{
-  std::vector<rclcpp::CallbackGroup::WeakPtr> groups;
-  for (const auto & group_node_ptr : weak_groups_to_nodes_associated_with_executor_) {
-    groups.push_back(group_node_ptr.first);
-  }
-  return groups;
-}
diff --git a/rclcpp/src/rclcpp/executors/static_single_threaded_executor.cpp b/rclcpp/src/rclcpp/executors/static_single_threaded_executor.cpp
index 3c14b37b45..9602bdb2c9 100644
--- a/rclcpp/src/rclcpp/executors/static_single_threaded_executor.cpp
+++ b/rclcpp/src/rclcpp/executors/static_single_threaded_executor.cpp
@@ -12,31 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "rclcpp/executors/static_single_threaded_executor.hpp"
-
-#include <chrono>
-#include <memory>
-#include <utility>
-#include <vector>
-
+#include "rclcpp/executors/executor_entities_collection.hpp"
 #include "rcpputils/scope_exit.hpp"
 
+#include "rclcpp/executors/static_single_threaded_executor.hpp"
+#include "rclcpp/any_executable.hpp"
+
 using rclcpp::executors::StaticSingleThreadedExecutor;
-using rclcpp::experimental::ExecutableList;
 
-StaticSingleThreadedExecutor::StaticSingleThreadedExecutor(
-  const rclcpp::ExecutorOptions & options)
+StaticSingleThreadedExecutor::StaticSingleThreadedExecutor(const rclcpp::ExecutorOptions & options)
 : rclcpp::Executor(options)
 {
-  entities_collector_ = std::make_shared<StaticExecutorEntitiesCollector>();
 }
 
-StaticSingleThreadedExecutor::~StaticSingleThreadedExecutor()
-{
-  if (entities_collector_->is_init()) {
-    entities_collector_->fini();
-  }
-}
+StaticSingleThreadedExecutor::~StaticSingleThreadedExecutor() {}
 
 void
 StaticSingleThreadedExecutor::spin()
@@ -46,14 +35,11 @@ StaticSingleThreadedExecutor::spin()
   }
   RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
 
-  // Set memory_strategy_ and exec_list_ based on weak_nodes_
-  // Prepare wait_set_ based on memory_strategy_
-  entities_collector_->init(&wait_set_, memory_strategy_);
-
+  // This is essentially the contents of the rclcpp::Executor::wait_for_work method,
+  // except we need to keep the wait result to reproduce the StaticSingleThreadedExecutor
+  // behavior.
   while (rclcpp::ok(this->context_) && spinning.load()) {
-    // Refresh wait set and wait for work
-    entities_collector_->refresh_wait_set();
-    execute_ready_executables();
+    this->spin_once_impl(std::chrono::nanoseconds(-1));
   }
 }
 
@@ -64,7 +50,6 @@ StaticSingleThreadedExecutor::spin_some(std::chrono::nanoseconds max_duration)
   if (std::chrono::nanoseconds(0) == max_duration) {
     max_duration = std::chrono::nanoseconds::max();
   }
-
   return this->spin_some_impl(max_duration, false);
 }
 
@@ -80,36 +65,32 @@ StaticSingleThreadedExecutor::spin_all(std::chrono::nanoseconds max_duration)
 void
 StaticSingleThreadedExecutor::spin_some_impl(std::chrono::nanoseconds max_duration, bool exhaustive)
 {
-  // Make sure the entities collector has been initialized
-  if (!entities_collector_->is_init()) {
-    entities_collector_->init(&wait_set_, memory_strategy_);
-  }
-
   auto start = std::chrono::steady_clock::now();
   auto max_duration_not_elapsed = [max_duration, start]() {
-      if (std::chrono::nanoseconds(0) == max_duration) {
-        // told to spin forever if need be
-        return true;
-      } else if (std::chrono::steady_clock::now() - start < max_duration) {
-        // told to spin only for some maximum amount of time
-        return true;
-      }
-      // spun too long
-      return false;
+      const auto spin_forever = std::chrono::nanoseconds(0) == max_duration;
+      const auto cur_duration = std::chrono::steady_clock::now() - start;
+      return spin_forever || (cur_duration < max_duration);
     };
 
   if (spinning.exchange(true)) {
     throw std::runtime_error("spin_some() called while already spinning");
   }
-  RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
+  RCPPUTILS_SCOPE_EXIT(this->spinning.store(false););
 
   while (rclcpp::ok(context_) && spinning.load() && max_duration_not_elapsed()) {
     // Get executables that are ready now
-    entities_collector_->refresh_wait_set(std::chrono::milliseconds::zero());
-    // Execute ready executables
-    bool work_available = execute_ready_executables();
-    if (!work_available || !exhaustive) {
-      break;
+    std::lock_guard<std::mutex> guard(mutex_);
+
+    auto wait_result = this->collect_and_wait(std::chrono::nanoseconds(0));
+    if (wait_result.has_value()) {
+      // Execute ready executables
+      bool work_available = this->execute_ready_executables(
+        current_collection_,
+        wait_result.value(),
+        false);
+      if (!work_available || !exhaustive) {
+        break;
+      }
     }
   }
 }
@@ -117,163 +98,99 @@ StaticSingleThreadedExecutor::spin_some_impl(std::chrono::nanoseconds max_durati
 void
 StaticSingleThreadedExecutor::spin_once_impl(std::chrono::nanoseconds timeout)
 {
-  // Make sure the entities collector has been initialized
-  if (!entities_collector_->is_init()) {
-    entities_collector_->init(&wait_set_, memory_strategy_);
-  }
-
   if (rclcpp::ok(context_) && spinning.load()) {
-    // Wait until we have a ready entity or timeout expired
-    entities_collector_->refresh_wait_set(timeout);
-    // Execute ready executables
-    execute_ready_executables(true);
+    std::lock_guard<std::mutex> guard(mutex_);
+    auto wait_result = this->collect_and_wait(timeout);
+    if (wait_result.has_value()) {
+      this->execute_ready_executables(current_collection_, wait_result.value(), true);
+    }
   }
 }
 
-void
-StaticSingleThreadedExecutor::add_callback_group(
-  rclcpp::CallbackGroup::SharedPtr group_ptr,
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
-  bool notify)
+std::optional<rclcpp::WaitResult<rclcpp::WaitSet>>
+StaticSingleThreadedExecutor::collect_and_wait(std::chrono::nanoseconds timeout)
 {
-  bool is_new_node = entities_collector_->add_callback_group(group_ptr, node_ptr);
-  if (is_new_node && notify) {
-    // Interrupt waiting to handle new node
-    interrupt_guard_condition_->trigger();
+  if (current_collection_.empty() || this->entities_need_rebuild_.load()) {
+    this->collect_entities();
   }
-}
-
-void
-StaticSingleThreadedExecutor::add_node(
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr, bool notify)
-{
-  bool is_new_node = entities_collector_->add_node(node_ptr);
-  if (is_new_node && notify) {
-    // Interrupt waiting to handle new node
-    interrupt_guard_condition_->trigger();
+  auto wait_result = wait_set_.wait(std::chrono::nanoseconds(timeout));
+  if (wait_result.kind() == WaitResultKind::Empty) {
+    RCUTILS_LOG_WARN_NAMED(
+      "rclcpp",
+      "empty wait set received in wait(). This should never happen.");
+    return {};
   }
+  return wait_result;
 }
 
-void
-StaticSingleThreadedExecutor::add_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify)
-{
-  this->add_node(node_ptr->get_node_base_interface(), notify);
-}
-
-void
-StaticSingleThreadedExecutor::remove_callback_group(
-  rclcpp::CallbackGroup::SharedPtr group_ptr, bool notify)
+// This preserves the "scheduling semantics" of the StaticSingleThreadedExecutor
+// from the original implementation.
+bool StaticSingleThreadedExecutor::execute_ready_executables(
+  const rclcpp::executors::ExecutorEntitiesCollection & collection,
+  rclcpp::WaitResult<rclcpp::WaitSet> & wait_result,
+  bool spin_once)
 {
-  bool node_removed = entities_collector_->remove_callback_group(group_ptr);
-  // If the node was matched and removed, interrupt waiting
-  if (node_removed && notify) {
-    interrupt_guard_condition_->trigger();
+  bool any_ready_executable = false;
+  if (wait_result.kind() != rclcpp::WaitResultKind::Ready) {
+    return any_ready_executable;
   }
-}
 
-void
-StaticSingleThreadedExecutor::remove_node(
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr, bool notify)
-{
-  bool node_removed = entities_collector_->remove_node(node_ptr);
-  if (!node_removed) {
-    throw std::runtime_error("Node needs to be associated with this executor.");
-  }
-  // If the node was matched and removed, interrupt waiting
-  if (notify) {
-    interrupt_guard_condition_->trigger();
+  while (auto subscription = wait_result.next_ready_subscription()) {
+    auto entity_iter = collection.subscriptions.find(subscription->get_subscription_handle().get());
+    if (entity_iter != collection.subscriptions.end()) {
+      execute_subscription(subscription);
+      any_ready_executable = true;
+      if (spin_once) {return any_ready_executable;}
+    }
   }
-}
-
-std::vector<rclcpp::CallbackGroup::WeakPtr>
-StaticSingleThreadedExecutor::get_all_callback_groups()
-{
-  return entities_collector_->get_all_callback_groups();
-}
-
-std::vector<rclcpp::CallbackGroup::WeakPtr>
-StaticSingleThreadedExecutor::get_manually_added_callback_groups()
-{
-  return entities_collector_->get_manually_added_callback_groups();
-}
-
-std::vector<rclcpp::CallbackGroup::WeakPtr>
-StaticSingleThreadedExecutor::get_automatically_added_callback_groups_from_nodes()
-{
-  return entities_collector_->get_automatically_added_callback_groups_from_nodes();
-}
-
-void
-StaticSingleThreadedExecutor::remove_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify)
-{
-  this->remove_node(node_ptr->get_node_base_interface(), notify);
-}
 
-bool
-StaticSingleThreadedExecutor::execute_ready_executables(bool spin_once)
-{
-  bool any_ready_executable = false;
-
-  // Execute all the ready subscriptions
-  for (size_t i = 0; i < wait_set_.size_of_subscriptions; ++i) {
-    if (i < entities_collector_->get_number_of_subscriptions()) {
-      if (wait_set_.subscriptions[i]) {
-        execute_subscription(entities_collector_->get_subscription(i));
-        if (spin_once) {
-          return true;
-        }
-        any_ready_executable = true;
-      }
+  size_t current_timer_index = 0;
+  while (true) {
+    auto [timer, timer_index] = wait_result.peek_next_ready_timer(current_timer_index);
+    if (nullptr == timer) {
+      break;
     }
-  }
-  // Execute all the ready timers
-  for (size_t i = 0; i < wait_set_.size_of_timers; ++i) {
-    if (i < entities_collector_->get_number_of_timers()) {
-      if (wait_set_.timers[i] && entities_collector_->get_timer(i)->is_ready()) {
-        auto timer = entities_collector_->get_timer(i);
-        timer->call();
-        execute_timer(std::move(timer));
-        if (spin_once) {
-          return true;
-        }
-        any_ready_executable = true;
+    current_timer_index = timer_index;
+    auto entity_iter = collection.timers.find(timer->get_timer_handle().get());
+    if (entity_iter != collection.timers.end()) {
+      wait_result.clear_timer_with_index(current_timer_index);
+      auto data = timer->call();
+      if (!data) {
+        // someone canceled the timer between is_ready and call
+        continue;
       }
+
+      execute_timer(std::move(timer), data);
+      any_ready_executable = true;
+      if (spin_once) {return any_ready_executable;}
     }
   }
-  // Execute all the ready services
-  for (size_t i = 0; i < wait_set_.size_of_services; ++i) {
-    if (i < entities_collector_->get_number_of_services()) {
-      if (wait_set_.services[i]) {
-        execute_service(entities_collector_->get_service(i));
-        if (spin_once) {
-          return true;
-        }
-        any_ready_executable = true;
-      }
+
+  while (auto client = wait_result.next_ready_client()) {
+    auto entity_iter = collection.clients.find(client->get_client_handle().get());
+    if (entity_iter != collection.clients.end()) {
+      execute_client(client);
+      any_ready_executable = true;
+      if (spin_once) {return any_ready_executable;}
     }
   }
-  // Execute all the ready clients
-  for (size_t i = 0; i < wait_set_.size_of_clients; ++i) {
-    if (i < entities_collector_->get_number_of_clients()) {
-      if (wait_set_.clients[i]) {
-        execute_client(entities_collector_->get_client(i));
-        if (spin_once) {
-          return true;
-        }
-        any_ready_executable = true;
-      }
+
+  while (auto service = wait_result.next_ready_service()) {
+    auto entity_iter = collection.services.find(service->get_service_handle().get());
+    if (entity_iter != collection.services.end()) {
+      execute_service(service);
+      any_ready_executable = true;
+      if (spin_once) {return any_ready_executable;}
     }
   }
-  // Execute all the ready waitables
-  for (size_t i = 0; i < entities_collector_->get_number_of_waitables(); ++i) {
-    auto waitable = entities_collector_->get_waitable(i);
-    if (waitable->is_ready(&wait_set_)) {
+
+  while (auto waitable = wait_result.next_ready_waitable()) {
+    auto entity_iter = collection.waitables.find(waitable.get());
+    if (entity_iter != collection.waitables.end()) {
       auto data = waitable->take_data();
       waitable->execute(data);
-      if (spin_once) {
-        return true;
-      }
       any_ready_executable = true;
+      if (spin_once) {return any_ready_executable;}
     }
   }
   return any_ready_executable;
diff --git a/rclcpp/src/rclcpp/experimental/executors/events_executor/events_executor.cpp b/rclcpp/src/rclcpp/experimental/executors/events_executor/events_executor.cpp
new file mode 100644
index 0000000000..7eafc0d94d
--- /dev/null
+++ b/rclcpp/src/rclcpp/experimental/executors/events_executor/events_executor.cpp
@@ -0,0 +1,521 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "rclcpp/experimental/executors/events_executor/events_executor.hpp"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "rcpputils/scope_exit.hpp"
+
+using namespace std::chrono_literals;
+
+using rclcpp::experimental::executors::EventsExecutor;
+
+EventsExecutor::EventsExecutor(
+  rclcpp::experimental::executors::EventsQueue::UniquePtr events_queue,
+  bool execute_timers_separate_thread,
+  const rclcpp::ExecutorOptions & options)
+: rclcpp::Executor(options)
+{
+  // Get ownership of the queue used to store events.
+  if (!events_queue) {
+    throw std::invalid_argument("events_queue can't be a null pointer");
+  }
+  events_queue_ = std::move(events_queue);
+
+  // Create timers manager
+  // The timers manager can be used either to only track timers (in this case an expired
+  // timer will generate an executor event and then it will be executed by the executor thread)
+  // or it can also take care of executing expired timers in its dedicated thread.
+  std::function<void(const rclcpp::TimerBase *,
+    const std::shared_ptr<void> &)> timer_on_ready_cb = nullptr;
+  if (!execute_timers_separate_thread) {
+    timer_on_ready_cb =
+      [this](const rclcpp::TimerBase * timer_id, const std::shared_ptr<void> & data) {
+        ExecutorEvent event = {timer_id, data, -1, ExecutorEventType::TIMER_EVENT, 1};
+        this->events_queue_->enqueue(event);
+      };
+  }
+  timers_manager_ =
+    std::make_shared<rclcpp::experimental::TimersManager>(context_, timer_on_ready_cb);
+
+  this->current_entities_collection_ =
+    std::make_shared<rclcpp::executors::ExecutorEntitiesCollection>();
+
+  notify_waitable_ = std::make_shared<rclcpp::executors::ExecutorNotifyWaitable>(
+    [this]() {
+      // This callback is invoked when:
+      // - the interrupt or shutdown guard condition is triggered:
+      //    ---> we need to wake up the executor so that it can terminate
+      // - a node or callback group guard condition is triggered:
+      //    ---> the entities collection is changed, we need to update callbacks
+      notify_waitable_event_pushed_ = false;
+      this->refresh_current_collection_from_callback_groups();
+    });
+
+  // Make sure that the notify waitable is immediately added to the collection
+  // to avoid missing events
+  this->add_notify_waitable_to_collection(current_entities_collection_->waitables);
+
+  notify_waitable_->add_guard_condition(interrupt_guard_condition_);
+  notify_waitable_->add_guard_condition(shutdown_guard_condition_);
+
+  notify_waitable_->set_on_ready_callback(
+    this->create_waitable_callback(notify_waitable_.get()));
+
+  auto notify_waitable_entity_id = notify_waitable_.get();
+  notify_waitable_->set_on_ready_callback(
+    [this, notify_waitable_entity_id](size_t num_events, int waitable_data) {
+      // The notify waitable has a special callback.
+      // We don't care about how many events as when we wake up the executor we are going to
+      // process everything regardless.
+      // For the same reason, if an event of this type has already been pushed but it has not been
+      // processed yet, we avoid pushing additional events.
+      (void)num_events;
+      if (notify_waitable_event_pushed_.exchange(true)) {
+        return;
+      }
+
+      ExecutorEvent event =
+      {notify_waitable_entity_id, nullptr, waitable_data, ExecutorEventType::WAITABLE_EVENT, 1};
+      this->events_queue_->enqueue(event);
+    });
+
+  this->entities_collector_ =
+    std::make_shared<rclcpp::executors::ExecutorEntitiesCollector>(notify_waitable_);
+}
+
+EventsExecutor::~EventsExecutor()
+{
+  spinning.store(false);
+  notify_waitable_->clear_on_ready_callback();
+  this->refresh_current_collection({});
+}
+
+void
+EventsExecutor::spin()
+{
+  if (spinning.exchange(true)) {
+    throw std::runtime_error("spin() called while already spinning");
+  }
+  RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
+
+  timers_manager_->start();
+  RCPPUTILS_SCOPE_EXIT(timers_manager_->stop(); );
+
+  while (rclcpp::ok(context_) && spinning.load()) {
+    // Wait until we get an event
+    ExecutorEvent event;
+    bool has_event = events_queue_->dequeue(event);
+    if (has_event) {
+      this->execute_event(event);
+    }
+  }
+}
+
+void
+EventsExecutor::spin_some(std::chrono::nanoseconds max_duration)
+{
+  return this->spin_some_impl(max_duration, false);
+}
+
+void
+EventsExecutor::spin_all(std::chrono::nanoseconds max_duration)
+{
+  if (max_duration <= 0ns) {
+    throw std::invalid_argument("max_duration must be positive");
+  }
+  return this->spin_some_impl(max_duration, true);
+}
+
+void
+EventsExecutor::spin_some_impl(std::chrono::nanoseconds max_duration, bool exhaustive)
+{
+  if (spinning.exchange(true)) {
+    throw std::runtime_error("spin_some() called while already spinning");
+  }
+
+  RCPPUTILS_SCOPE_EXIT(this->spinning.store(false); );
+
+  auto start = std::chrono::steady_clock::now();
+
+  auto max_duration_not_elapsed = [max_duration, start]() {
+      if (std::chrono::nanoseconds(0) == max_duration) {
+        // told to spin forever if need be
+        return true;
+      } else if (std::chrono::steady_clock::now() - start < max_duration) {
+        // told to spin only for some maximum amount of time
+        return true;
+      }
+      // spun too long
+      return false;
+    };
+
+  // Get the number of events and timers ready at start
+  const size_t ready_events_at_start = events_queue_->size();
+  size_t executed_events = 0;
+  const size_t ready_timers_at_start = timers_manager_->get_number_ready_timers();
+  size_t executed_timers = 0;
+
+  while (rclcpp::ok(context_) && spinning.load() && max_duration_not_elapsed()) {
+    // Execute first ready event from queue if exists
+    if (exhaustive || (executed_events < ready_events_at_start)) {
+      bool has_event = !events_queue_->empty();
+
+      if (has_event) {
+        ExecutorEvent event;
+        bool ret = events_queue_->dequeue(event, std::chrono::nanoseconds(0));
+        if (ret) {
+          this->execute_event(event);
+          executed_events++;
+          continue;
+        }
+      }
+    }
+
+    // Execute first timer if it is ready
+    if (exhaustive || (executed_timers < ready_timers_at_start)) {
+      bool timer_executed = timers_manager_->execute_head_timer();
+      if (timer_executed) {
+        executed_timers++;
+        continue;
+      }
+    }
+
+    // If there's no more work available, exit
+    break;
+  }
+}
+
+void
+EventsExecutor::spin_once_impl(std::chrono::nanoseconds timeout)
+{
+  // In this context a negative input timeout means no timeout
+  if (timeout < 0ns) {
+    timeout = std::chrono::nanoseconds::max();
+  }
+
+  // Select the smallest between input timeout and timer timeout.
+  // Cancelled timers are not considered.
+  bool is_timer_timeout = false;
+  auto next_timer_timeout = timers_manager_->get_head_timeout();
+  if (next_timer_timeout.has_value() && next_timer_timeout.value() < timeout) {
+    timeout = next_timer_timeout.value();
+    is_timer_timeout = true;
+  }
+
+  ExecutorEvent event;
+  bool has_event = events_queue_->dequeue(event, timeout);
+
+  // If we wake up from the wait with an event, it means that it
+  // arrived before any of the timers expired.
+  if (has_event) {
+    this->execute_event(event);
+  } else if (is_timer_timeout) {
+    timers_manager_->execute_head_timer();
+  }
+}
+
+void
+EventsExecutor::add_node(
+  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr, bool notify)
+{
+  // This field is unused because we don't have to wake up the executor when a node is added.
+  (void) notify;
+
+  // Add node to entities collector
+  this->entities_collector_->add_node(node_ptr);
+
+  this->refresh_current_collection_from_callback_groups();
+}
+
+void
+EventsExecutor::add_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify)
+{
+  this->add_node(node_ptr->get_node_base_interface(), notify);
+}
+
+void
+EventsExecutor::remove_node(
+  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr, bool notify)
+{
+  // This field is unused because we don't have to wake up the executor when a node is removed.
+  (void)notify;
+
+  // Remove node from entities collector.
+  // This will result in un-setting all the event callbacks from its entities.
+  // After this function returns, this executor will not receive any more events associated
+  // to these entities.
+  this->entities_collector_->remove_node(node_ptr);
+
+  this->refresh_current_collection_from_callback_groups();
+}
+
+void
+EventsExecutor::remove_node(std::shared_ptr<rclcpp::Node> node_ptr, bool notify)
+{
+  this->remove_node(node_ptr->get_node_base_interface(), notify);
+}
+
+void
+EventsExecutor::execute_event(const ExecutorEvent & event)
+{
+  switch (event.type) {
+    case ExecutorEventType::CLIENT_EVENT:
+      {
+        rclcpp::ClientBase::SharedPtr client;
+        {
+          std::lock_guard<std::recursive_mutex> lock(collection_mutex_);
+          client = this->retrieve_entity(
+            static_cast<const rcl_client_t *>(event.entity_key),
+            current_entities_collection_->clients);
+        }
+        if (client) {
+          for (size_t i = 0; i < event.num_events; i++) {
+            execute_client(client);
+          }
+        }
+
+        break;
+      }
+    case ExecutorEventType::SUBSCRIPTION_EVENT:
+      {
+        rclcpp::SubscriptionBase::SharedPtr subscription;
+        {
+          std::lock_guard<std::recursive_mutex> lock(collection_mutex_);
+          subscription = this->retrieve_entity(
+            static_cast<const rcl_subscription_t *>(event.entity_key),
+            current_entities_collection_->subscriptions);
+        }
+        if (subscription) {
+          for (size_t i = 0; i < event.num_events; i++) {
+            execute_subscription(subscription);
+          }
+        }
+        break;
+      }
+    case ExecutorEventType::SERVICE_EVENT:
+      {
+        rclcpp::ServiceBase::SharedPtr service;
+        {
+          std::lock_guard<std::recursive_mutex> lock(collection_mutex_);
+          service = this->retrieve_entity(
+            static_cast<const rcl_service_t *>(event.entity_key),
+            current_entities_collection_->services);
+        }
+        if (service) {
+          for (size_t i = 0; i < event.num_events; i++) {
+            execute_service(service);
+          }
+        }
+
+        break;
+      }
+    case ExecutorEventType::TIMER_EVENT:
+      {
+        timers_manager_->execute_ready_timer(
+          static_cast<const rclcpp::TimerBase *>(event.entity_key), event.data);
+        break;
+      }
+    case ExecutorEventType::WAITABLE_EVENT:
+      {
+        rclcpp::Waitable::SharedPtr waitable;
+        {
+          std::lock_guard<std::recursive_mutex> lock(collection_mutex_);
+          waitable = this->retrieve_entity(
+            static_cast<const rclcpp::Waitable *>(event.entity_key),
+            current_entities_collection_->waitables);
+        }
+        if (waitable) {
+          for (size_t i = 0; i < event.num_events; i++) {
+            auto data = waitable->take_data_by_entity_id(event.waitable_data);
+            waitable->execute(data);
+          }
+        }
+        break;
+      }
+  }
+}
+
+void
+EventsExecutor::add_callback_group(
+  rclcpp::CallbackGroup::SharedPtr group_ptr,
+  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr node_ptr,
+  bool notify)
+{
+  // This field is unused because we don't have to wake up
+  // the executor when a callback group is added.
+  (void)notify;
+  (void)node_ptr;
+
+  this->entities_collector_->add_callback_group(group_ptr);
+
+  this->refresh_current_collection_from_callback_groups();
+}
+
+void
+EventsExecutor::remove_callback_group(
+  rclcpp::CallbackGroup::SharedPtr group_ptr, bool notify)
+{
+  // This field is unused because we don't have to wake up
+  // the executor when a callback group is removed.
+  (void)notify;
+
+  this->entities_collector_->remove_callback_group(group_ptr);
+
+  this->refresh_current_collection_from_callback_groups();
+}
+
+std::vector<rclcpp::CallbackGroup::WeakPtr>
+EventsExecutor::get_all_callback_groups()
+{
+  this->entities_collector_->update_collections();
+  return this->entities_collector_->get_all_callback_groups();
+}
+
+std::vector<rclcpp::CallbackGroup::WeakPtr>
+EventsExecutor::get_manually_added_callback_groups()
+{
+  this->entities_collector_->update_collections();
+  return this->entities_collector_->get_manually_added_callback_groups();
+}
+
+std::vector<rclcpp::CallbackGroup::WeakPtr>
+EventsExecutor::get_automatically_added_callback_groups_from_nodes()
+{
+  this->entities_collector_->update_collections();
+  return this->entities_collector_->get_automatically_added_callback_groups();
+}
+
+void
+EventsExecutor::refresh_current_collection_from_callback_groups()
+{
+  // Build the new collection
+  this->entities_collector_->update_collections();
+  auto callback_groups = this->entities_collector_->get_all_callback_groups();
+  rclcpp::executors::ExecutorEntitiesCollection new_collection;
+  rclcpp::executors::build_entities_collection(callback_groups, new_collection);
+
+  // TODO(alsora): this may be implemented in a better way.
+  // We need the notify waitable to be included in the executor "current_collection"
+  // because we need to be able to retrieve events for it.
+  // We could explicitly check for the notify waitable ID when we receive a waitable event
+  // but I think that it's better if the waitable was in the collection and it could be
+  // retrieved in the "standard" way.
+  // To do it, we need to add the notify waitable as an entry in both the new and
+  // current collections such that it's neither added or removed.
+  this->add_notify_waitable_to_collection(new_collection.waitables);
+
+  // Acquire lock before modifying the current collection
+  std::lock_guard<std::recursive_mutex> lock(collection_mutex_);
+  this->add_notify_waitable_to_collection(current_entities_collection_->waitables);
+
+  this->refresh_current_collection(new_collection);
+}
+
+void
+EventsExecutor::refresh_current_collection(
+  const rclcpp::executors::ExecutorEntitiesCollection & new_collection)
+{
+  // Acquire lock before modifying the current collection
+  std::lock_guard<std::recursive_mutex> lock(collection_mutex_);
+
+  current_entities_collection_->remove_expired_entities();
+
+  current_entities_collection_->timers.update(
+    new_collection.timers,
+    [this](rclcpp::TimerBase::SharedPtr timer) {timers_manager_->add_timer(timer);},
+    [this](rclcpp::TimerBase::SharedPtr timer) {timers_manager_->remove_timer(timer);});
+
+  current_entities_collection_->subscriptions.update(
+    new_collection.subscriptions,
+    [this](auto subscription) {
+      subscription->set_on_new_message_callback(
+        this->create_entity_callback(
+          subscription->get_subscription_handle().get(), ExecutorEventType::SUBSCRIPTION_EVENT));
+    },
+    [](auto subscription) {subscription->clear_on_new_message_callback();});
+
+  current_entities_collection_->clients.update(
+    new_collection.clients,
+    [this](auto client) {
+      client->set_on_new_response_callback(
+        this->create_entity_callback(
+          client->get_client_handle().get(), ExecutorEventType::CLIENT_EVENT));
+    },
+    [](auto client) {client->clear_on_new_response_callback();});
+
+  current_entities_collection_->services.update(
+    new_collection.services,
+    [this](auto service) {
+      service->set_on_new_request_callback(
+        this->create_entity_callback(
+          service->get_service_handle().get(), ExecutorEventType::SERVICE_EVENT));
+    },
+    [](auto service) {service->clear_on_new_request_callback();});
+
+  // DO WE NEED THIS? WE ARE NOT DOING ANYTHING WITH GUARD CONDITIONS
+  /*
+  current_entities_collection_->guard_conditions.update(new_collection.guard_conditions,
+    [](auto guard_condition) {(void)guard_condition;},
+    [](auto guard_condition) {guard_condition->set_on_trigger_callback(nullptr);});
+  */
+
+  current_entities_collection_->waitables.update(
+    new_collection.waitables,
+    [this](auto waitable) {
+      waitable->set_on_ready_callback(
+        this->create_waitable_callback(waitable.get()));
+    },
+    [](auto waitable) {waitable->clear_on_ready_callback();});
+}
+
+std::function<void(size_t)>
+EventsExecutor::create_entity_callback(
+  void * entity_key, ExecutorEventType event_type)
+{
+  std::function<void(size_t)>
+  callback = [this, entity_key, event_type](size_t num_events) {
+      ExecutorEvent event = {entity_key, nullptr, -1, event_type, num_events};
+      this->events_queue_->enqueue(event);
+    };
+  return callback;
+}
+
+std::function<void(size_t, int)>
+EventsExecutor::create_waitable_callback(const rclcpp::Waitable * entity_key)
+{
+  std::function<void(size_t, int)>
+  callback = [this, entity_key](size_t num_events, int waitable_data) {
+      ExecutorEvent event =
+      {entity_key, nullptr, waitable_data, ExecutorEventType::WAITABLE_EVENT, num_events};
+      this->events_queue_->enqueue(event);
+    };
+  return callback;
+}
+
+void
+EventsExecutor::add_notify_waitable_to_collection(
+  rclcpp::executors::ExecutorEntitiesCollection::WaitableCollection & collection)
+{
+  // The notify waitable is not associated to any group, so use an invalid one
+  rclcpp::CallbackGroup::WeakPtr weak_group_ptr;
+  collection.insert(
+  {
+    this->notify_waitable_.get(),
+    {this->notify_waitable_, weak_group_ptr}
+  });
+}
diff --git a/rclcpp/src/rclcpp/experimental/timers_manager.cpp b/rclcpp/src/rclcpp/experimental/timers_manager.cpp
new file mode 100644
index 0000000000..2caa0a6b15
--- /dev/null
+++ b/rclcpp/src/rclcpp/experimental/timers_manager.cpp
@@ -0,0 +1,329 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "rclcpp/experimental/timers_manager.hpp"
+
+#include <inttypes.h>
+
+#include <ctime>
+#include <iostream>
+#include <memory>
+#include <stdexcept>
+
+#include "rcpputils/scope_exit.hpp"
+
+using rclcpp::experimental::TimersManager;
+
+TimersManager::TimersManager(
+  std::shared_ptr<rclcpp::Context> context,
+  std::function<void(const rclcpp::TimerBase *, const std::shared_ptr<void> &)> on_ready_callback)
+: on_ready_callback_(on_ready_callback),
+  context_(context)
+{
+}
+
+TimersManager::~TimersManager()
+{
+  // Remove all timers
+  this->clear();
+
+  // Make sure timers thread is stopped before destroying this object
+  this->stop();
+}
+
+void TimersManager::add_timer(rclcpp::TimerBase::SharedPtr timer)
+{
+  if (!timer) {
+    throw std::invalid_argument("TimersManager::add_timer() trying to add nullptr timer");
+  }
+
+  bool added = false;
+  {
+    std::unique_lock<std::mutex> lock(timers_mutex_);
+    added = weak_timers_heap_.add_timer(timer);
+    timers_updated_ = timers_updated_ || added;
+  }
+
+  timer->set_on_reset_callback(
+    [this](size_t arg) {
+      {
+        (void)arg;
+        std::unique_lock<std::mutex> lock(timers_mutex_);
+        timers_updated_ = true;
+      }
+      timers_cv_.notify_one();
+    });
+
+  if (added) {
+    // Notify that a timer has been added
+    timers_cv_.notify_one();
+  }
+}
+
+void TimersManager::start()
+{
+  // Make sure that the thread is not already running
+  if (running_.exchange(true)) {
+    throw std::runtime_error("TimersManager::start() can't start timers thread as already running");
+  }
+
+  timers_thread_ = std::thread(&TimersManager::run_timers, this);
+}
+
+void TimersManager::stop()
+{
+  // Lock stop() function to prevent race condition in destructor
+  std::unique_lock<std::mutex> lock(stop_mutex_);
+  running_ = false;
+
+  // Notify the timers manager thread to wake up
+  {
+    std::unique_lock<std::mutex> lock(timers_mutex_);
+    timers_updated_ = true;
+  }
+  timers_cv_.notify_one();
+
+  // Join timers thread if it's running
+  if (timers_thread_.joinable()) {
+    timers_thread_.join();
+  }
+}
+
+std::optional<std::chrono::nanoseconds> TimersManager::get_head_timeout()
+{
+  // Do not allow to interfere with the thread running
+  if (running_) {
+    throw std::runtime_error(
+            "get_head_timeout() can't be used while timers thread is running");
+  }
+
+  std::unique_lock<std::mutex> lock(timers_mutex_);
+  return this->get_head_timeout_unsafe();
+}
+
+size_t TimersManager::get_number_ready_timers()
+{
+  // Do not allow to interfere with the thread running
+  if (running_) {
+    throw std::runtime_error(
+            "get_number_ready_timers() can't be used while timers thread is running");
+  }
+
+  std::unique_lock<std::mutex> lock(timers_mutex_);
+  TimersHeap locked_heap = weak_timers_heap_.validate_and_lock();
+  return locked_heap.get_number_ready_timers();
+}
+
+bool TimersManager::execute_head_timer()
+{
+  // Do not allow to interfere with the thread running
+  if (running_) {
+    throw std::runtime_error(
+            "execute_head_timer() can't be used while timers thread is running");
+  }
+
+  std::unique_lock<std::mutex> lock(timers_mutex_);
+
+  TimersHeap timers_heap = weak_timers_heap_.validate_and_lock();
+
+  // Nothing to do if we don't have any timer
+  if (timers_heap.empty()) {
+    return false;
+  }
+
+  TimerPtr head_timer = timers_heap.front();
+
+  const bool timer_ready = head_timer->is_ready();
+  if (timer_ready) {
+    // NOTE: here we always execute the timer, regardless of whether the
+    // on_ready_callback is set or not.
+    auto data = head_timer->call();
+    if (!data) {
+      // someone canceled the timer between is_ready and call
+      return false;
+    }
+    head_timer->execute_callback(data);
+    timers_heap.heapify_root();
+    weak_timers_heap_.store(timers_heap);
+  }
+
+  return timer_ready;
+}
+
+void TimersManager::execute_ready_timer(
+  const rclcpp::TimerBase * timer_id,
+  const std::shared_ptr<void> & data)
+{
+  TimerPtr ready_timer;
+  {
+    std::unique_lock<std::mutex> lock(timers_mutex_);
+    ready_timer = weak_timers_heap_.get_timer(timer_id);
+  }
+  if (ready_timer) {
+    ready_timer->execute_callback(data);
+  }
+}
+
+std::optional<std::chrono::nanoseconds> TimersManager::get_head_timeout_unsafe()
+{
+  // If we don't have any weak pointer, then we just return maximum timeout
+  if (weak_timers_heap_.empty()) {
+    return std::chrono::nanoseconds::max();
+  }
+  // Weak heap is not empty, so try to lock the first element.
+  // If it is still a valid pointer, it is guaranteed to be the correct head
+  TimerPtr head_timer = weak_timers_heap_.front().lock();
+
+  if (!head_timer) {
+    // The first element has expired, we can't make other assumptions on the heap
+    // and we need to entirely validate it.
+    TimersHeap locked_heap = weak_timers_heap_.validate_and_lock();
+    // NOTE: the following operations will not modify any element in the heap, so we
+    // don't have to call `weak_timers_heap_.store(locked_heap)` at the end.
+
+    if (locked_heap.empty()) {
+      return std::chrono::nanoseconds::max();
+    }
+    head_timer = locked_heap.front();
+  }
+  if (head_timer->is_canceled()) {
+    return std::nullopt;
+  }
+  return head_timer->time_until_trigger();
+}
+
+void TimersManager::execute_ready_timers_unsafe()
+{
+  // We start by locking the timers
+  TimersHeap locked_heap = weak_timers_heap_.validate_and_lock();
+
+  // Nothing to do if we don't have any timer
+  if (locked_heap.empty()) {
+    return;
+  }
+
+  // Keep executing timers until they are ready and they were already ready when we started.
+  // The two checks prevent this function from blocking indefinitely if the
+  // time required for executing the timers is longer than their period.
+
+  TimerPtr head_timer = locked_heap.front();
+  const size_t number_ready_timers = locked_heap.get_number_ready_timers();
+  size_t executed_timers = 0;
+  while (executed_timers < number_ready_timers && head_timer->is_ready()) {
+    auto data = head_timer->call();
+    if (data) {
+      if (on_ready_callback_) {
+        on_ready_callback_(head_timer.get(), data);
+      } else {
+        head_timer->execute_callback(data);
+      }
+    } else {
+      // someone canceled the timer between is_ready and call
+      // we don't do anything, as the timer is now 'processed'
+    }
+
+    executed_timers++;
+    // Executing a timer will result in updating its time_until_trigger, so re-heapify
+    locked_heap.heapify_root();
+    // Get new head timer
+    head_timer = locked_heap.front();
+  }
+
+  // After having performed work on the locked heap we reflect the changes to weak one.
+  // Timers will be already sorted the next time we need them if none went out of scope.
+  weak_timers_heap_.store(locked_heap);
+}
+
+void TimersManager::run_timers()
+{
+  // Make sure the running flag is set to false when we exit from this function
+  // to allow restarting the timers thread.
+  RCPPUTILS_SCOPE_EXIT(this->running_.store(false); );
+
+  while (rclcpp::ok(context_) && running_) {
+    // Lock mutex
+    std::unique_lock<std::mutex> lock(timers_mutex_);
+
+    std::optional<std::chrono::nanoseconds> time_to_sleep = get_head_timeout_unsafe();
+
+    // If head timer was cancelled, try to reheap and get a new head.
+    // This avoids an edge condition where head timer is cancelled, but other
+    // valid timers remain in the heap.
+    if (!time_to_sleep.has_value()) {
+      // Re-heap to (possibly) move cancelled timer from head of heap. If
+      // entire heap is cancelled, this will still result in a nullopt.
+      TimersHeap locked_heap = weak_timers_heap_.validate_and_lock();
+      locked_heap.heapify();
+      weak_timers_heap_.store(locked_heap);
+      time_to_sleep = get_head_timeout_unsafe();
+    }
+
+    // If no timers, or all timers cancelled, wait for an update.
+    if (!time_to_sleep.has_value() || (time_to_sleep.value() == std::chrono::nanoseconds::max()) ) {
+      // Wait until notification that timers have been updated
+      timers_cv_.wait(lock, [this]() {return timers_updated_;});
+
+      // Re-heap in case ordering changed due to a cancelled timer
+      // re-activating.
+      TimersHeap locked_heap = weak_timers_heap_.validate_and_lock();
+      locked_heap.heapify();
+      weak_timers_heap_.store(locked_heap);
+    } else if (time_to_sleep.value() != std::chrono::nanoseconds::zero()) {
+      // If time_to_sleep is zero, we immediately execute. Otherwise, wait
+      // until timeout or notification that timers have been updated
+      timers_cv_.wait_for(lock, time_to_sleep.value(), [this]() {return timers_updated_;});
+    }
+
+    // Reset timers updated flag
+    timers_updated_ = false;
+
+    // Execute timers
+    this->execute_ready_timers_unsafe();
+  }
+}
+
+void TimersManager::clear()
+{
+  {
+    // Lock mutex and then clear all data structures
+    std::unique_lock<std::mutex> lock(timers_mutex_);
+
+    TimersHeap locked_heap = weak_timers_heap_.validate_and_lock();
+    locked_heap.clear_timers_on_reset_callbacks();
+
+    weak_timers_heap_.clear();
+
+    timers_updated_ = true;
+  }
+
+  // Notify timers thread such that it can re-compute its timeout
+  timers_cv_.notify_one();
+}
+
+void TimersManager::remove_timer(TimerPtr timer)
+{
+  bool removed = false;
+  {
+    std::unique_lock<std::mutex> lock(timers_mutex_);
+    removed = weak_timers_heap_.remove_timer(timer);
+
+    timers_updated_ = timers_updated_ || removed;
+  }
+
+  if (removed) {
+    // Notify timers thread such that it can re-compute its timeout
+    timers_cv_.notify_one();
+    timer->clear_on_reset_callback();
+  }
+}
diff --git a/rclcpp/src/rclcpp/guard_condition.cpp b/rclcpp/src/rclcpp/guard_condition.cpp
index ea68c78d73..627644e602 100644
--- a/rclcpp/src/rclcpp/guard_condition.cpp
+++ b/rclcpp/src/rclcpp/guard_condition.cpp
@@ -74,16 +74,19 @@ GuardCondition::get_rcl_guard_condition() const
 void
 GuardCondition::trigger()
 {
-  std::lock_guard<std::recursive_mutex> lock(reentrant_mutex_);
+  rcl_ret_t ret = rcl_trigger_guard_condition(&rcl_guard_condition_);
+  if (RCL_RET_OK != ret) {
+    rclcpp::exceptions::throw_from_rcl_error(ret);
+  }
 
-  if (on_trigger_callback_) {
-    on_trigger_callback_(1);
-  } else {
-    rcl_ret_t ret = rcl_trigger_guard_condition(&rcl_guard_condition_);
-    if (RCL_RET_OK != ret) {
-      rclcpp::exceptions::throw_from_rcl_error(ret);
+  {
+    std::lock_guard<std::recursive_mutex> lock(reentrant_mutex_);
+
+    if (on_trigger_callback_) {
+      on_trigger_callback_(1);
+    } else {
+      unread_count_++;
     }
-    unread_count_++;
   }
 }
 
@@ -125,10 +128,9 @@ GuardCondition::set_on_trigger_callback(std::function<void(size_t)> callback)
       callback(unread_count_);
       unread_count_ = 0;
     }
-    return;
+  } else {
+    on_trigger_callback_ = nullptr;
   }
-
-  on_trigger_callback_ = nullptr;
 }
 
 }  // namespace rclcpp
diff --git a/rclcpp/test/benchmark/benchmark_executor.cpp b/rclcpp/test/benchmark/benchmark_executor.cpp
index 652007b589..65bb3a1007 100644
--- a/rclcpp/test/benchmark/benchmark_executor.cpp
+++ b/rclcpp/test/benchmark/benchmark_executor.cpp
@@ -362,42 +362,3 @@ BENCHMARK_F(PerformanceTestExecutorSimple, spin_until_future_complete)(benchmark
     }
   }
 }
-
-BENCHMARK_F(
-  PerformanceTestExecutorSimple,
-  static_executor_entities_collector_execute)(benchmark::State & st)
-{
-  rclcpp::executors::StaticExecutorEntitiesCollector::SharedPtr entities_collector_ =
-    std::make_shared<rclcpp::executors::StaticExecutorEntitiesCollector>();
-  entities_collector_->add_node(node->get_node_base_interface());
-
-  rcl_wait_set_t wait_set = rcl_get_zero_initialized_wait_set();
-  rcl_allocator_t allocator = rcl_get_default_allocator();
-  auto shared_context = node->get_node_base_interface()->get_context();
-  rcl_context_t * context = shared_context->get_rcl_context().get();
-  rcl_ret_t ret = rcl_wait_set_init(&wait_set, 100, 100, 100, 100, 100, 100, context, allocator);
-  if (ret != RCL_RET_OK) {
-    st.SkipWithError(rcutils_get_error_string().str);
-  }
-  RCPPUTILS_SCOPE_EXIT(
-  {
-    rcl_ret_t ret = rcl_wait_set_fini(&wait_set);
-    if (ret != RCL_RET_OK) {
-      st.SkipWithError(rcutils_get_error_string().str);
-    }
-  });
-
-  auto memory_strategy = rclcpp::memory_strategies::create_default_strategy();
-  rclcpp::GuardCondition guard_condition(shared_context);
-
-  entities_collector_->init(&wait_set, memory_strategy);
-  RCPPUTILS_SCOPE_EXIT(entities_collector_->fini());
-
-  reset_heap_counters();
-
-  for (auto _ : st) {
-    (void)_;
-    std::shared_ptr<void> data = entities_collector_->take_data();
-    entities_collector_->execute(data);
-  }
-}
diff --git a/rclcpp/test/rclcpp/CMakeLists.txt b/rclcpp/test/rclcpp/CMakeLists.txt
index c08ecfc826..ca00153343 100644
--- a/rclcpp/test/rclcpp/CMakeLists.txt
+++ b/rclcpp/test/rclcpp/CMakeLists.txt
@@ -591,6 +591,20 @@ if(TARGET test_timer)
   target_link_libraries(test_timer ${PROJECT_NAME} mimick)
 endif()
 
+ament_add_gtest(test_reinitialized_timers test_reinitialized_timers.cpp
+  APPEND_LIBRARY_DIRS "${append_library_dirs}")
+if(TARGET test_reinitialized_timers)
+  ament_target_dependencies(test_reinitialized_timers
+    "rcl")
+  target_link_libraries(test_reinitialized_timers ${PROJECT_NAME} mimick)
+endif()
+
+ament_add_gtest(test_timers_manager test_timers_manager.cpp
+  APPEND_LIBRARY_DIRS "${append_library_dirs}")
+if(TARGET test_timers_manager)
+  target_link_libraries(test_timers_manager ${PROJECT_NAME})
+endif()
+
 ament_add_gtest(test_time_source test_time_source.cpp
   APPEND_LIBRARY_DIRS "${append_library_dirs}")
 if(TARGET test_time_source)
@@ -622,18 +636,22 @@ if(TARGET test_interface_traits)
   target_link_libraries(test_interface_traits ${PROJECT_NAME})
 endif()
 
-# TODO(brawner) remove when destroying Node for Connext is resolved. See:
-# https://github.com/ros2/rclcpp/issues/1250
 ament_add_gtest(
   test_executors
   executors/test_executors.cpp
   APPEND_LIBRARY_DIRS "${append_library_dirs}"
   TIMEOUT 180)
 if(TARGET test_executors)
-  ament_target_dependencies(test_executors
-    "rcl"
-    "test_msgs")
-  target_link_libraries(test_executors ${PROJECT_NAME})
+  target_link_libraries(test_executors ${PROJECT_NAME} rcl::rcl ${test_msgs_TARGETS})
+endif()
+
+ament_add_gtest(
+  test_executors_timer_cancel_behavior
+  executors/test_executors_timer_cancel_behavior.cpp
+  APPEND_LIBRARY_DIRS "${append_library_dirs}"
+  TIMEOUT 180)
+if(TARGET test_executors)
+  target_link_libraries(test_executors_timer_cancel_behavior ${PROJECT_NAME} ${rosgraph_msgs_TARGETS})
 endif()
 
 ament_add_gtest(test_static_single_threaded_executor executors/test_static_single_threaded_executor.cpp
@@ -652,15 +670,6 @@ if(TARGET test_multi_threaded_executor)
   target_link_libraries(test_multi_threaded_executor ${PROJECT_NAME})
 endif()
 
-ament_add_gtest(test_static_executor_entities_collector executors/test_static_executor_entities_collector.cpp
-  APPEND_LIBRARY_DIRS "${append_library_dirs}" TIMEOUT 120)
-if(TARGET test_static_executor_entities_collector)
-  ament_target_dependencies(test_static_executor_entities_collector
-    "rcl"
-    "test_msgs")
-  target_link_libraries(test_static_executor_entities_collector ${PROJECT_NAME} mimick)
-endif()
-
 ament_add_gtest(test_entities_collector executors/test_entities_collector.cpp
   APPEND_LIBRARY_DIRS "${append_library_dirs}" TIMEOUT 120)
 if(TARGET test_entities_collector)
@@ -679,6 +688,17 @@ if(TARGET test_executor_notify_waitable)
   target_link_libraries(test_executor_notify_waitable ${PROJECT_NAME} mimick)
 endif()
 
+ament_add_gtest(test_events_executor executors/test_events_executor.cpp TIMEOUT 5)
+if(TARGET test_events_executor)
+  target_link_libraries(test_events_executor ${PROJECT_NAME} ${test_msgs_TARGETS})
+endif()
+
+ament_add_gtest(test_events_queue executors/test_events_queue.cpp
+  APPEND_LIBRARY_DIRS "${append_library_dirs}")
+if(TARGET test_events_queue)
+  target_link_libraries(test_events_queue ${PROJECT_NAME})
+endif()
+
 ament_add_gtest(test_guard_condition test_guard_condition.cpp
   APPEND_LIBRARY_DIRS "${append_library_dirs}")
 if(TARGET test_guard_condition)
diff --git a/rclcpp/test/rclcpp/executors/executor_types.hpp b/rclcpp/test/rclcpp/executors/executor_types.hpp
new file mode 100644
index 0000000000..0218a9b547
--- /dev/null
+++ b/rclcpp/test/rclcpp/executors/executor_types.hpp
@@ -0,0 +1,70 @@
+// Copyright 2017 Open Source Robotics Foundation, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RCLCPP__EXECUTORS__EXECUTOR_TYPES_HPP_
+#define RCLCPP__EXECUTORS__EXECUTOR_TYPES_HPP_
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <type_traits>
+
+#include "rclcpp/experimental/executors/events_executor/events_executor.hpp"
+#include "rclcpp/executors/single_threaded_executor.hpp"
+#include "rclcpp/executors/static_single_threaded_executor.hpp"
+#include "rclcpp/executors/multi_threaded_executor.hpp"
+
+using ExecutorTypes =
+  ::testing::Types<
+  rclcpp::executors::SingleThreadedExecutor,
+  rclcpp::executors::MultiThreadedExecutor,
+  rclcpp::executors::StaticSingleThreadedExecutor,
+  rclcpp::experimental::executors::EventsExecutor>;
+
+class ExecutorTypeNames
+{
+public:
+  template<typename T>
+  static std::string GetName(int idx)
+  {
+    (void)idx;
+    if (std::is_same<T, rclcpp::executors::SingleThreadedExecutor>()) {
+      return "SingleThreadedExecutor";
+    }
+
+    if (std::is_same<T, rclcpp::executors::MultiThreadedExecutor>()) {
+      return "MultiThreadedExecutor";
+    }
+
+    if (std::is_same<T, rclcpp::executors::StaticSingleThreadedExecutor>()) {
+      return "StaticSingleThreadedExecutor";
+    }
+
+    if (std::is_same<T, rclcpp::experimental::executors::EventsExecutor>()) {
+      return "EventsExecutor";
+    }
+
+    return "";
+  }
+};
+
+// StaticSingleThreadedExecutor is not included in these tests for now, due to:
+// https://github.com/ros2/rclcpp/issues/1219
+using StandardExecutors =
+  ::testing::Types<
+  rclcpp::executors::SingleThreadedExecutor,
+  rclcpp::executors::MultiThreadedExecutor,
+  rclcpp::experimental::executors::EventsExecutor>;
+
+#endif  // RCLCPP__EXECUTORS__EXECUTOR_TYPES_HPP_
diff --git a/rclcpp/test/rclcpp/executors/test_events_executor.cpp b/rclcpp/test/rclcpp/executors/test_events_executor.cpp
new file mode 100644
index 0000000000..13092b7067
--- /dev/null
+++ b/rclcpp/test/rclcpp/executors/test_events_executor.cpp
@@ -0,0 +1,492 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <memory>
+#include <string>
+
+#include "rclcpp/experimental/executors/events_executor/events_executor.hpp"
+
+#include "test_msgs/srv/empty.hpp"
+#include "test_msgs/msg/empty.hpp"
+
+using namespace std::chrono_literals;
+
+using rclcpp::experimental::executors::EventsExecutor;
+
+class TestEventsExecutor : public ::testing::Test
+{
+public:
+  void SetUp()
+  {
+    rclcpp::init(0, nullptr);
+  }
+
+  void TearDown()
+  {
+    rclcpp::shutdown();
+  }
+};
+
+TEST_F(TestEventsExecutor, run_pub_sub)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  bool msg_received = false;
+  auto subscription = node->create_subscription<test_msgs::msg::Empty>(
+    "topic", rclcpp::SensorDataQoS(),
+    [&msg_received](test_msgs::msg::Empty::ConstSharedPtr msg)
+    {
+      (void)msg;
+      msg_received = true;
+    });
+
+  auto publisher = node->create_publisher<test_msgs::msg::Empty>("topic", rclcpp::SensorDataQoS());
+
+  EventsExecutor executor;
+  executor.add_node(node);
+
+  bool spin_exited = false;
+  std::thread spinner([&spin_exited, &executor]() {
+      executor.spin();
+      spin_exited = true;
+    });
+
+  auto msg = std::make_unique<test_msgs::msg::Empty>();
+  publisher->publish(std::move(msg));
+
+  // Wait some time for the subscription to receive the message
+  auto start = std::chrono::high_resolution_clock::now();
+  while (
+    !msg_received &&
+    !spin_exited &&
+    (std::chrono::high_resolution_clock::now() - start < 1s))
+  {
+    std::this_thread::sleep_for(25ms);
+  }
+
+  executor.cancel();
+  spinner.join();
+  executor.remove_node(node);
+
+  EXPECT_TRUE(msg_received);
+  EXPECT_TRUE(spin_exited);
+}
+
+TEST_F(TestEventsExecutor, run_clients_servers)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  bool request_received = false;
+  bool response_received = false;
+  auto service =
+    node->create_service<test_msgs::srv::Empty>(
+    "service",
+    [&request_received](
+      const test_msgs::srv::Empty::Request::SharedPtr,
+      test_msgs::srv::Empty::Response::SharedPtr)
+    {
+      request_received = true;
+    });
+  auto client = node->create_client<test_msgs::srv::Empty>("service");
+
+  EventsExecutor executor;
+  executor.add_node(node);
+
+  bool spin_exited = false;
+  std::thread spinner([&spin_exited, &executor]() {
+      executor.spin();
+      spin_exited = true;
+    });
+
+  auto request = std::make_shared<test_msgs::srv::Empty::Request>();
+  client->async_send_request(
+    request,
+    [&response_received](rclcpp::Client<test_msgs::srv::Empty>::SharedFuture result_future) {
+      (void)result_future;
+      response_received = true;
+    });
+
+  // Wait some time for the client-server to be invoked
+  auto start = std::chrono::steady_clock::now();
+  while (
+    !response_received &&
+    !spin_exited &&
+    (std::chrono::steady_clock::now() - start < 1s))
+  {
+    std::this_thread::sleep_for(5ms);
+  }
+
+  executor.cancel();
+  spinner.join();
+  executor.remove_node(node);
+
+  EXPECT_TRUE(request_received);
+  EXPECT_TRUE(response_received);
+  EXPECT_TRUE(spin_exited);
+}
+
+TEST_F(TestEventsExecutor, spin_once_max_duration_timeout)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  EventsExecutor executor;
+  executor.add_node(node);
+
+  // Consume previous events so we have a fresh start
+  executor.spin_all(1s);
+
+  size_t t_runs = 0;
+  auto t = node->create_wall_timer(
+    10s,
+    [&]() {
+      t_runs++;
+    });
+
+  // This first spin_once takes care of the waitable event
+  // generated by the addition of the timer to the node
+  executor.spin_once(1s);
+  EXPECT_EQ(0u, t_runs);
+
+  auto start = std::chrono::steady_clock::now();
+
+  // This second spin_once should take care of the timer,
+  executor.spin_once(10ms);
+
+  // but doesn't spin the time enough to call the timer callback.
+  EXPECT_EQ(0u, t_runs);
+  EXPECT_TRUE(std::chrono::steady_clock::now() - start < 200ms);
+}
+
+TEST_F(TestEventsExecutor, spin_once_max_duration_timer)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  EventsExecutor executor;
+  executor.add_node(node);
+
+  // Consume previous events so we have a fresh start
+  executor.spin_all(1s);
+
+  size_t t_runs = 0;
+  auto t = node->create_wall_timer(
+    10ms,
+    [&]() {
+      t_runs++;
+    });
+
+  // This first spin_once takes care of the waitable event
+  // generated by the addition of the timer to the node
+  executor.spin_once(1s);
+  EXPECT_EQ(0u, t_runs);
+
+  auto start = std::chrono::steady_clock::now();
+
+  // This second spin_once should take care of the timer
+  executor.spin_once(11ms);
+
+  EXPECT_EQ(1u, t_runs);
+  EXPECT_TRUE(std::chrono::steady_clock::now() - start < 200ms);
+}
+
+TEST_F(TestEventsExecutor, spin_some_max_duration)
+{
+  {
+    auto node = std::make_shared<rclcpp::Node>("node");
+
+    size_t t_runs = 0;
+    auto t = node->create_wall_timer(
+      10s,
+      [&]() {
+        t_runs++;
+      });
+
+    EventsExecutor executor;
+    executor.add_node(node);
+
+    auto start = std::chrono::steady_clock::now();
+    executor.spin_some(10ms);
+
+    EXPECT_EQ(0u, t_runs);
+    EXPECT_TRUE(std::chrono::steady_clock::now() - start < 200ms);
+  }
+
+  {
+    auto node = std::make_shared<rclcpp::Node>("node");
+
+    size_t t_runs = 0;
+    auto t = node->create_wall_timer(
+      10ms,
+      [&]() {
+        t_runs++;
+      });
+
+    // Sleep some time for the timer to be ready when spin
+    std::this_thread::sleep_for(10ms);
+
+    EventsExecutor executor;
+    executor.add_node(node);
+
+    auto start = std::chrono::steady_clock::now();
+    executor.spin_some(10s);
+
+    EXPECT_EQ(1u, t_runs);
+    EXPECT_TRUE(std::chrono::steady_clock::now() - start < 200ms);
+  }
+}
+
+TEST_F(TestEventsExecutor, spin_some_zero_duration)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  size_t t_runs = 0;
+  auto t = node->create_wall_timer(
+    20ms,
+    [&]() {
+      t_runs++;
+    });
+
+  // Sleep some time for the timer to be ready when spin
+  std::this_thread::sleep_for(20ms);
+
+  EventsExecutor executor;
+  executor.add_node(node);
+  executor.spin_some(0ms);
+
+  EXPECT_EQ(1u, t_runs);
+}
+
+TEST_F(TestEventsExecutor, spin_all_max_duration)
+{
+  {
+    auto node = std::make_shared<rclcpp::Node>("node");
+
+    size_t t_runs = 0;
+    auto t = node->create_wall_timer(
+      10s,
+      [&]() {
+        t_runs++;
+      });
+
+    EventsExecutor executor;
+    executor.add_node(node);
+
+    auto start = std::chrono::steady_clock::now();
+    executor.spin_all(10ms);
+
+    EXPECT_EQ(0u, t_runs);
+    EXPECT_TRUE(std::chrono::steady_clock::now() - start < 200ms);
+  }
+
+  {
+    auto node = std::make_shared<rclcpp::Node>("node");
+
+    size_t t_runs = 0;
+    auto t = node->create_wall_timer(
+      10ms,
+      [&]() {
+        t_runs++;
+      });
+
+    // Sleep some time for the timer to be ready when spin
+    std::this_thread::sleep_for(10ms);
+
+    EventsExecutor executor;
+    executor.add_node(node);
+
+    auto start = std::chrono::steady_clock::now();
+    executor.spin_all(10s);
+
+    EXPECT_EQ(1u, t_runs);
+    EXPECT_TRUE(std::chrono::steady_clock::now() - start < 200ms);
+  }
+
+  EventsExecutor executor;
+  EXPECT_THROW(executor.spin_all(0ms), std::invalid_argument);
+  EXPECT_THROW(executor.spin_all(-5ms), std::invalid_argument);
+}
+
+TEST_F(TestEventsExecutor, cancel_while_timers_running)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  EventsExecutor executor;
+  executor.add_node(node);
+
+  // Take care of previous events for a fresh start
+  executor.spin_all(1s);
+
+  size_t t1_runs = 0;
+  auto t1 = node->create_wall_timer(
+    1ms,
+    [&]() {
+      t1_runs++;
+      std::this_thread::sleep_for(50ms);
+    });
+
+  size_t t2_runs = 0;
+  auto t2 = node->create_wall_timer(
+    1ms,
+    [&]() {
+      t2_runs++;
+      std::this_thread::sleep_for(50ms);
+    });
+
+
+  std::thread spinner([&executor]() {executor.spin();});
+
+  std::this_thread::sleep_for(10ms);
+  // Call cancel while t1 callback is still being executed
+  executor.cancel();
+  spinner.join();
+
+  // Depending on the latency on the system, t2 may start to execute before cancel is signaled
+  EXPECT_GE(1u, t1_runs);
+  EXPECT_GE(1u, t2_runs);
+}
+
+TEST_F(TestEventsExecutor, cancel_while_timers_waiting)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+
+  size_t t1_runs = 0;
+  auto t1 = node->create_wall_timer(
+    100s,
+    [&]() {
+      t1_runs++;
+    });
+
+  EventsExecutor executor;
+  executor.add_node(node);
+
+  auto start = std::chrono::steady_clock::now();
+  std::thread spinner([&executor]() {executor.spin();});
+
+  std::this_thread::sleep_for(10ms);
+  executor.cancel();
+  spinner.join();
+
+  EXPECT_EQ(0u, t1_runs);
+  EXPECT_TRUE(std::chrono::steady_clock::now() - start < 1s);
+}
+
+TEST_F(TestEventsExecutor, destroy_entities)
+{
+  // This test fails on Windows! We skip it for now
+  GTEST_SKIP();
+
+  // Create a publisher node and start publishing messages
+  auto node_pub = std::make_shared<rclcpp::Node>("node_pub");
+  auto publisher = node_pub->create_publisher<test_msgs::msg::Empty>("topic", rclcpp::QoS(10));
+  auto timer = node_pub->create_wall_timer(
+    2ms, [&]() {publisher->publish(std::make_unique<test_msgs::msg::Empty>());});
+  EventsExecutor executor_pub;
+  executor_pub.add_node(node_pub);
+  std::thread spinner([&executor_pub]() {executor_pub.spin();});
+
+  // Create a node with two different subscriptions to the topic
+  auto node_sub = std::make_shared<rclcpp::Node>("node_sub");
+  size_t callback_count_1 = 0;
+  auto subscription_1 =
+    node_sub->create_subscription<test_msgs::msg::Empty>(
+    "topic", rclcpp::QoS(10), [&](test_msgs::msg::Empty::ConstSharedPtr) {callback_count_1++;});
+  size_t callback_count_2 = 0;
+  auto subscription_2 =
+    node_sub->create_subscription<test_msgs::msg::Empty>(
+    "topic", rclcpp::QoS(10), [&](test_msgs::msg::Empty::ConstSharedPtr) {callback_count_2++;});
+  EventsExecutor executor_sub;
+  executor_sub.add_node(node_sub);
+
+  // Wait some time while messages are published
+  std::this_thread::sleep_for(10ms);
+
+  // Destroy one of the two subscriptions
+  subscription_1.reset();
+
+  // Let subscriptions executor spin
+  executor_sub.spin_some(10ms);
+
+  // The callback count of the destroyed subscription remained at 0
+  EXPECT_EQ(0u, callback_count_1);
+  EXPECT_LT(0u, callback_count_2);
+
+  executor_pub.cancel();
+  spinner.join();
+}
+
+// Testing construction of a subscriptions with QoS event callback functions.
+std::string * g_pub_log_msg;
+std::string * g_sub_log_msg;
+std::promise<void> * g_log_msgs_promise;
+TEST_F(TestEventsExecutor, test_default_incompatible_qos_callbacks)
+{
+  auto node = std::make_shared<rclcpp::Node>("node");
+  rcutils_logging_output_handler_t original_output_handler = rcutils_logging_get_output_handler();
+
+  std::string pub_log_msg;
+  std::string sub_log_msg;
+  std::promise<void> log_msgs_promise;
+  g_pub_log_msg = &pub_log_msg;
+  g_sub_log_msg = &sub_log_msg;
+  g_log_msgs_promise = &log_msgs_promise;
+  auto logger_callback = [](
+    const rcutils_log_location_t * /*location*/,
+    int /*level*/, const char * /*name*/, rcutils_time_point_value_t /*timestamp*/,
+    const char * format, va_list * args) -> void {
+      char buffer[1024];
+      vsnprintf(buffer, sizeof(buffer), format, *args);
+      const std::string msg = buffer;
+      if (msg.rfind("New subscription discovered on topic '/test_topic'", 0) == 0) {
+        *g_pub_log_msg = buffer;
+      } else if (msg.rfind("New publisher discovered on topic '/test_topic'", 0) == 0) {
+        *g_sub_log_msg = buffer;
+      }
+
+      if (!g_pub_log_msg->empty() && !g_sub_log_msg->empty()) {
+        g_log_msgs_promise->set_value();
+      }
+    };
+  rcutils_logging_set_output_handler(logger_callback);
+
+  std::shared_future<void> log_msgs_future = log_msgs_promise.get_future();
+
+  rclcpp::QoS qos_profile_publisher(10);
+  qos_profile_publisher.durability(RMW_QOS_POLICY_DURABILITY_VOLATILE);
+  auto publisher = node->create_publisher<test_msgs::msg::Empty>(
+    "test_topic", qos_profile_publisher);
+
+  rclcpp::QoS qos_profile_subscription(10);
+  qos_profile_subscription.durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL);
+  auto subscription = node->create_subscription<test_msgs::msg::Empty>(
+    "test_topic", qos_profile_subscription, [&](test_msgs::msg::Empty::ConstSharedPtr) {});
+
+  EventsExecutor ex;
+  ex.add_node(node->get_node_base_interface());
+
+  const auto timeout = std::chrono::seconds(10);
+  ex.spin_until_future_complete(log_msgs_future, timeout);
+
+  EXPECT_EQ(
+    "New subscription discovered on topic '/test_topic', requesting incompatible QoS. "
+    "No messages will be sent to it. Last incompatible policy: DURABILITY_QOS_POLICY",
+    pub_log_msg);
+  EXPECT_EQ(
+    "New publisher discovered on topic '/test_topic', offering incompatible QoS. "
+    "No messages will be sent to it. Last incompatible policy: DURABILITY_QOS_POLICY",
+    sub_log_msg);
+
+  rcutils_logging_set_output_handler(original_output_handler);
+}
diff --git a/rclcpp/test/rclcpp/executors/test_events_queue.cpp b/rclcpp/test/rclcpp/executors/test_events_queue.cpp
new file mode 100644
index 0000000000..741e6ad384
--- /dev/null
+++ b/rclcpp/test/rclcpp/executors/test_events_queue.cpp
@@ -0,0 +1,83 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "rclcpp/experimental/executors/events_executor/events_executor_event_types.hpp"
+#include "rclcpp/experimental/executors/events_executor/simple_events_queue.hpp"
+
+using namespace std::chrono_literals;
+
+TEST(TestEventsQueue, SimpleQueueTest)
+{
+  // Create a SimpleEventsQueue and a local queue
+  auto simple_queue = std::make_unique<rclcpp::experimental::executors::SimpleEventsQueue>();
+  rclcpp::experimental::executors::ExecutorEvent event {};
+  bool ret = false;
+
+  // Make sure the queue is empty at startup
+  EXPECT_TRUE(simple_queue->empty());
+  EXPECT_EQ(simple_queue->size(), 0u);
+
+  // Push 11 messages
+  for (uint32_t i = 1; i < 11; i++) {
+    rclcpp::experimental::executors::ExecutorEvent stub_event {};
+    stub_event.num_events = 1;
+    simple_queue->enqueue(stub_event);
+
+    EXPECT_FALSE(simple_queue->empty());
+    EXPECT_EQ(simple_queue->size(), i);
+  }
+
+  // Pop one message
+  ret = simple_queue->dequeue(event);
+  EXPECT_TRUE(ret);
+  EXPECT_FALSE(simple_queue->empty());
+  EXPECT_EQ(simple_queue->size(), 9u);
+
+  // Pop one message
+  ret = simple_queue->dequeue(event, std::chrono::nanoseconds(0));
+  EXPECT_TRUE(ret);
+  EXPECT_FALSE(simple_queue->empty());
+  EXPECT_EQ(simple_queue->size(), 8u);
+
+  while (!simple_queue->empty()) {
+    ret = simple_queue->dequeue(event);
+    EXPECT_TRUE(ret);
+  }
+
+  EXPECT_TRUE(simple_queue->empty());
+  EXPECT_EQ(simple_queue->size(), 0u);
+
+  ret = simple_queue->dequeue(event, std::chrono::nanoseconds(0));
+  EXPECT_FALSE(ret);
+
+  // Lets push an event into the queue and get it back
+  rclcpp::experimental::executors::ExecutorEvent push_event = {
+    simple_queue.get(),
+    nullptr,
+    99,
+    rclcpp::experimental::executors::ExecutorEventType::SUBSCRIPTION_EVENT,
+    1};
+
+  simple_queue->enqueue(push_event);
+  ret = simple_queue->dequeue(event);
+  EXPECT_TRUE(ret);
+  EXPECT_EQ(push_event.entity_key, event.entity_key);
+  EXPECT_EQ(push_event.waitable_data, event.waitable_data);
+  EXPECT_EQ(push_event.type, event.type);
+  EXPECT_EQ(push_event.num_events, event.num_events);
+}
diff --git a/rclcpp/test/rclcpp/executors/test_executors.cpp b/rclcpp/test/rclcpp/executors/test_executors.cpp
index eb6652f19b..f5127b1725 100644
--- a/rclcpp/test/rclcpp/executors/test_executors.cpp
+++ b/rclcpp/test/rclcpp/executors/test_executors.cpp
@@ -15,17 +15,19 @@
 /**
  * This test checks all implementations of rclcpp::executor to check they pass they basic API
  * tests. Anything specific to any executor in particular should go in a separate test file.
- *
  */
+
 #include <gtest/gtest.h>
 
 #include <algorithm>
+#include <atomic>
 #include <chrono>
 #include <limits>
 #include <memory>
 #include <string>
 #include <thread>
 #include <utility>
+#include <vector>
 
 #include "rcl/error_handling.h"
 #include "rcl/time.h"
@@ -34,27 +36,22 @@
 #include "rclcpp/duration.hpp"
 #include "rclcpp/guard_condition.hpp"
 #include "rclcpp/rclcpp.hpp"
+#include "rclcpp/time_source.hpp"
 
 #include "test_msgs/msg/empty.hpp"
 
+#include "./executor_types.hpp"
+
 using namespace std::chrono_literals;
 
 template<typename T>
 class TestExecutors : public ::testing::Test
 {
 public:
-  static void SetUpTestCase()
+  void SetUp()
   {
     rclcpp::init(0, nullptr);
-  }
-
-  static void TearDownTestCase()
-  {
-    rclcpp::shutdown();
-  }
 
-  void SetUp()
-  {
     const auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
     std::stringstream test_name;
     test_name << test_info->test_case_name() << "_" << test_info->name();
@@ -75,6 +72,8 @@ class TestExecutors : public ::testing::Test
     publisher.reset();
     subscription.reset();
     node.reset();
+
+    rclcpp::shutdown();
   }
 
   rclcpp::Node::SharedPtr node;
@@ -83,54 +82,16 @@ class TestExecutors : public ::testing::Test
   int callback_count;
 };
 
-// spin_all and spin_some are not implemented correctly in StaticSingleThreadedExecutor, see:
-// https://github.com/ros2/rclcpp/issues/1219 for tracking
 template<typename T>
 class TestExecutorsStable : public TestExecutors<T> {};
 
-using ExecutorTypes =
-  ::testing::Types<
-  rclcpp::executors::SingleThreadedExecutor,
-  rclcpp::executors::MultiThreadedExecutor,
-  rclcpp::executors::StaticSingleThreadedExecutor>;
-
-class ExecutorTypeNames
-{
-public:
-  template<typename T>
-  static std::string GetName(int idx)
-  {
-    (void)idx;
-    if (std::is_same<T, rclcpp::executors::SingleThreadedExecutor>()) {
-      return "SingleThreadedExecutor";
-    }
-
-    if (std::is_same<T, rclcpp::executors::MultiThreadedExecutor>()) {
-      return "MultiThreadedExecutor";
-    }
-
-    if (std::is_same<T, rclcpp::executors::StaticSingleThreadedExecutor>()) {
-      return "StaticSingleThreadedExecutor";
-    }
-
-    return "";
-  }
-};
-
-// TYPED_TEST_SUITE is deprecated as of gtest 1.9, use TYPED_TEST_SUITE when gtest dependency
-// is updated.
 TYPED_TEST_SUITE(TestExecutors, ExecutorTypes, ExecutorTypeNames);
 
-// StaticSingleThreadedExecutor is not included in these tests for now, due to:
-// https://github.com/ros2/rclcpp/issues/1219
-using StandardExecutors =
-  ::testing::Types<
-  rclcpp::executors::SingleThreadedExecutor,
-  rclcpp::executors::MultiThreadedExecutor>;
 TYPED_TEST_SUITE(TestExecutorsStable, StandardExecutors, ExecutorTypeNames);
 
 // Make sure that executors detach from nodes when destructing
-TYPED_TEST(TestExecutors, detachOnDestruction) {
+TYPED_TEST(TestExecutors, detachOnDestruction)
+{
   using ExecutorType = TypeParam;
   {
     ExecutorType executor;
@@ -145,7 +106,8 @@ TYPED_TEST(TestExecutors, detachOnDestruction) {
 // Make sure that the executor can automatically remove expired nodes correctly
 // Currently fails for StaticSingleThreadedExecutor so it is being skipped, see:
 // https://github.com/ros2/rclcpp/issues/1231
-TYPED_TEST(TestExecutorsStable, addTemporaryNode) {
+TYPED_TEST(TestExecutorsStable, addTemporaryNode)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
 
@@ -163,8 +125,20 @@ TYPED_TEST(TestExecutorsStable, addTemporaryNode) {
   spinner.join();
 }
 
+// Make sure that a spinning empty executor can be cancelled
+TYPED_TEST(TestExecutors, emptyExecutor)
+{
+  using ExecutorType = TypeParam;
+  ExecutorType executor;
+  std::thread spinner([&]() {EXPECT_NO_THROW(executor.spin());});
+  std::this_thread::sleep_for(50ms);
+  executor.cancel();
+  spinner.join();
+}
+
 // Check executor throws properly if the same node is added a second time
-TYPED_TEST(TestExecutors, addNodeTwoExecutors) {
+TYPED_TEST(TestExecutors, addNodeTwoExecutors)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor1;
   ExecutorType executor2;
@@ -174,7 +148,8 @@ TYPED_TEST(TestExecutors, addNodeTwoExecutors) {
 }
 
 // Check simple spin example
-TYPED_TEST(TestExecutors, spinWithTimer) {
+TYPED_TEST(TestExecutors, spinWithTimer)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
 
@@ -196,19 +171,23 @@ TYPED_TEST(TestExecutors, spinWithTimer) {
   executor.remove_node(this->node, true);
 }
 
-TYPED_TEST(TestExecutors, spinWhileAlreadySpinning) {
+TYPED_TEST(TestExecutors, spinWhileAlreadySpinning)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
-  executor.add_node(this->node);
 
-  bool timer_completed = false;
-  auto timer = this->node->create_wall_timer(1ms, [&]() {timer_completed = true;});
+  std::atomic_bool timer_completed = false;
+  auto timer = this->node->create_wall_timer(
+    1ms, [&]() {
+      timer_completed.store(true);
+    });
 
+  executor.add_node(this->node);
   std::thread spinner([&]() {executor.spin();});
-  // Sleep for a short time to verify executor.spin() is going, and didn't throw.
 
+  // Sleep for a short time to verify executor.spin() is going, and didn't throw.
   auto start = std::chrono::steady_clock::now();
-  while (!timer_completed && (std::chrono::steady_clock::now() - start) < 10s) {
+  while (!timer_completed.load() && (std::chrono::steady_clock::now() - start) < 10s) {
     std::this_thread::sleep_for(1ms);
   }
 
@@ -222,7 +201,8 @@ TYPED_TEST(TestExecutors, spinWhileAlreadySpinning) {
 }
 
 // Check executor exits immediately if future is complete.
-TYPED_TEST(TestExecutors, testSpinUntilFutureComplete) {
+TYPED_TEST(TestExecutors, testSpinUntilFutureComplete)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   executor.add_node(this->node);
@@ -244,7 +224,8 @@ TYPED_TEST(TestExecutors, testSpinUntilFutureComplete) {
 }
 
 // Same test, but uses a shared future.
-TYPED_TEST(TestExecutors, testSpinUntilSharedFutureComplete) {
+TYPED_TEST(TestExecutors, testSpinUntilSharedFutureComplete)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   executor.add_node(this->node);
@@ -267,7 +248,8 @@ TYPED_TEST(TestExecutors, testSpinUntilSharedFutureComplete) {
 }
 
 // For a longer running future that should require several iterations of spin_once
-TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteNoTimeout) {
+TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteNoTimeout)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   executor.add_node(this->node);
@@ -313,7 +295,8 @@ TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteNoTimeout) {
 }
 
 // Check spin_until_future_complete timeout works as expected
-TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteWithTimeout) {
+TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteWithTimeout)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   executor.add_node(this->node);
@@ -359,11 +342,18 @@ class TestWaitable : public rclcpp::Waitable
   void
   add_to_wait_set(rcl_wait_set_t * wait_set) override
   {
+    if (trigger_count_ > 0) {
+      // Keep the gc triggered until the trigger count is reduced back to zero.
+      // This is necessary if trigger() results in the wait set waking, but not
+      // executing this waitable, in which case it needs to be re-triggered.
+      gc_.trigger();
+    }
     rclcpp::detail::add_guard_condition_to_rcl_wait_set(*wait_set, gc_);
   }
 
   void trigger()
   {
+    trigger_count_++;
     gc_.trigger();
   }
 
@@ -380,29 +370,54 @@ class TestWaitable : public rclcpp::Waitable
     return nullptr;
   }
 
+  std::shared_ptr<void>
+  take_data_by_entity_id(size_t id) override
+  {
+    (void) id;
+    return nullptr;
+  }
+
   void
   execute(std::shared_ptr<void> & data) override
   {
     (void) data;
+    trigger_count_--;
     count_++;
     std::this_thread::sleep_for(3ms);
   }
 
+  void
+  set_on_ready_callback(std::function<void(size_t, int)> callback) override
+  {
+    auto gc_callback = [callback](size_t count) {
+        callback(count, 0);
+      };
+    gc_.set_on_trigger_callback(gc_callback);
+  }
+
+  void
+  clear_on_ready_callback() override
+  {
+    gc_.set_on_trigger_callback(nullptr);
+  }
+
   size_t
   get_number_of_ready_guard_conditions() override {return 1;}
 
   size_t
-  get_count()
+  get_count() const
   {
     return count_;
   }
 
 private:
-  size_t count_ = 0;
+  std::atomic<size_t> trigger_count_ = 0;
+  std::atomic<size_t> count_ = 0;
   rclcpp::GuardCondition gc_;
 };
 
-TYPED_TEST(TestExecutors, spinAll) {
+TYPED_TEST(TestExecutors, spinAll)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   auto waitable_interfaces = this->node->get_node_waitables_interface();
@@ -443,7 +458,8 @@ TYPED_TEST(TestExecutors, spinAll) {
   spinner.join();
 }
 
-TYPED_TEST(TestExecutors, spinSome) {
+TYPED_TEST(TestExecutors, spinSome)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   auto waitable_interfaces = this->node->get_node_waitables_interface();
@@ -472,8 +488,9 @@ TYPED_TEST(TestExecutors, spinSome) {
     this->publisher->publish(test_msgs::msg::Empty());
     std::this_thread::sleep_for(1ms);
   }
-
-  EXPECT_EQ(1u, my_waitable->get_count());
+  // The count of "execute" depends on whether the executor starts spinning before (1) or after (0)
+  // the first iteration of the while loop
+  EXPECT_LE(1u, my_waitable->get_count());
   waitable_interfaces->remove_waitable(my_waitable, nullptr);
   EXPECT_TRUE(spin_exited);
   // Cancel if it hasn't exited already.
@@ -483,7 +500,8 @@ TYPED_TEST(TestExecutors, spinSome) {
 }
 
 // Check spin_node_until_future_complete with node base pointer
-TYPED_TEST(TestExecutors, testSpinNodeUntilFutureCompleteNodeBasePtr) {
+TYPED_TEST(TestExecutors, testSpinNodeUntilFutureCompleteNodeBasePtr)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
 
@@ -498,7 +516,8 @@ TYPED_TEST(TestExecutors, testSpinNodeUntilFutureCompleteNodeBasePtr) {
 }
 
 // Check spin_node_until_future_complete with node pointer
-TYPED_TEST(TestExecutors, testSpinNodeUntilFutureCompleteNodePtr) {
+TYPED_TEST(TestExecutors, testSpinNodeUntilFutureCompleteNodePtr)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
 
@@ -513,7 +532,8 @@ TYPED_TEST(TestExecutors, testSpinNodeUntilFutureCompleteNodePtr) {
 }
 
 // Check spin_until_future_complete can be properly interrupted.
-TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteInterrupted) {
+TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteInterrupted)
+{
   using ExecutorType = TypeParam;
   ExecutorType executor;
   executor.add_node(this->node);
@@ -555,8 +575,81 @@ TYPED_TEST(TestExecutors, testSpinUntilFutureCompleteInterrupted) {
   spinner.join();
 }
 
+// This test verifies that the add_node operation is robust wrt race conditions.
+// It's mostly meant to prevent regressions in the events-executor, but the operation should be
+// thread-safe in all executor implementations.
+// The initial implementation of the events-executor contained a bug where the executor
+// would end up in an inconsistent state and stop processing interrupt/shutdown notifications.
+// Manually adding a node to the executor results in a) producing a notify waitable event
+// and b) refreshing the executor collections.
+// The inconsistent state would happen if the event was processed before the collections were
+// finished to be refreshed: the executor would pick up the event but be unable to process it.
+// This would leave the `notify_waitable_event_pushed_` flag to true, preventing additional
+// notify waitable events to be pushed.
+// The behavior is observable only under heavy load, so this test spawns several worker
+// threads. Due to the nature of the bug, this test may still succeed even if the
+// bug is present. However repeated runs will show its flakiness nature and indicate
+// an eventual regression.
+TYPED_TEST(TestExecutors, testRaceConditionAddNode)
+{
+  using ExecutorType = TypeParam;
+  // rmw_connextdds doesn't support events-executor
+  if (
+    std::is_same<ExecutorType, rclcpp::experimental::executors::EventsExecutor>() &&
+    std::string(rmw_get_implementation_identifier()).find("rmw_connextdds") == 0)
+  {
+    GTEST_SKIP();
+  }
+
+  // Spawn some threads to do some heavy work
+  std::atomic<bool> should_cancel = false;
+  std::vector<std::thread> stress_threads;
+  for (size_t i = 0; i < 5 * std::thread::hardware_concurrency(); i++) {
+    stress_threads.emplace_back(
+      [&should_cancel, i]() {
+        // This is just some arbitrary heavy work
+        volatile size_t total = 0;
+        for (size_t k = 0; k < 549528914167; k++) {
+          if (should_cancel) {
+            break;
+          }
+          total += k * (i + 42);
+          (void)total;
+        }
+      });
+  }
+
+  // Create an executor
+  auto executor = std::make_shared<ExecutorType>();
+  // Start spinning
+  auto executor_thread = std::thread(
+    [executor]() {
+      executor->spin();
+    });
+  // Add a node to the executor
+  executor->add_node(this->node);
+
+  // Cancel the executor (make sure that it's already spinning first)
+  while (!executor->is_spinning() && rclcpp::ok()) {
+    continue;
+  }
+  executor->cancel();
+
+  // Try to join the thread after cancelling the executor
+  // This is the "test". We want to make sure that we can still cancel the executor
+  // regardless of the presence of race conditions
+  executor_thread.join();
+
+  // The test is now completed: we can join the stress threads
+  should_cancel = true;
+  for (auto & t : stress_threads) {
+    t.join();
+  }
+}
+
 // Check spin_until_future_complete with node base pointer (instantiates its own executor)
-TEST(TestExecutors, testSpinUntilFutureCompleteNodeBasePtr) {
+TEST(TestExecutors, testSpinUntilFutureCompleteNodeBasePtr)
+{
   rclcpp::init(0, nullptr);
 
   {
@@ -576,7 +669,8 @@ TEST(TestExecutors, testSpinUntilFutureCompleteNodeBasePtr) {
 }
 
 // Check spin_until_future_complete with node pointer (instantiates its own executor)
-TEST(TestExecutors, testSpinUntilFutureCompleteNodePtr) {
+TEST(TestExecutors, testSpinUntilFutureCompleteNodePtr)
+{
   rclcpp::init(0, nullptr);
 
   {
@@ -593,106 +687,3 @@ TEST(TestExecutors, testSpinUntilFutureCompleteNodePtr) {
 
   rclcpp::shutdown();
 }
-
-template<typename T>
-class TestIntraprocessExecutors : public ::testing::Test
-{
-public:
-  static void SetUpTestCase()
-  {
-    rclcpp::init(0, nullptr);
-  }
-
-  static void TearDownTestCase()
-  {
-    rclcpp::shutdown();
-  }
-
-  void SetUp()
-  {
-    const auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
-    std::stringstream test_name;
-    test_name << test_info->test_case_name() << "_" << test_info->name();
-    node = std::make_shared<rclcpp::Node>("node", test_name.str());
-
-    callback_count = 0;
-
-    const std::string topic_name = std::string("topic_") + test_name.str();
-
-    rclcpp::PublisherOptions po;
-    po.use_intra_process_comm = rclcpp::IntraProcessSetting::Enable;
-    publisher = node->create_publisher<test_msgs::msg::Empty>(topic_name, rclcpp::QoS(1), po);
-
-    auto callback = [this](test_msgs::msg::Empty::ConstSharedPtr) {
-        this->callback_count.fetch_add(1);
-      };
-
-    rclcpp::SubscriptionOptions so;
-    so.use_intra_process_comm = rclcpp::IntraProcessSetting::Enable;
-    subscription =
-      node->create_subscription<test_msgs::msg::Empty>(
-      topic_name, rclcpp::QoS(kNumMessages), std::move(callback), so);
-  }
-
-  void TearDown()
-  {
-    publisher.reset();
-    subscription.reset();
-    node.reset();
-  }
-
-  const size_t kNumMessages = 100;
-
-  rclcpp::Node::SharedPtr node;
-  rclcpp::Publisher<test_msgs::msg::Empty>::SharedPtr publisher;
-  rclcpp::Subscription<test_msgs::msg::Empty>::SharedPtr subscription;
-  std::atomic_int callback_count;
-};
-
-TYPED_TEST_SUITE(TestIntraprocessExecutors, ExecutorTypes, ExecutorTypeNames);
-
-TYPED_TEST(TestIntraprocessExecutors, testIntraprocessRetrigger) {
-  // This tests that executors will continue to service intraprocess subscriptions in the case
-  // that publishers aren't continuing to publish.
-  // This was previously broken in that intraprocess guard conditions were only triggered on
-  // publish and the test was added to prevent future regressions.
-  const size_t kNumMessages = 100;
-
-  using ExecutorType = TypeParam;
-  ExecutorType executor;
-  executor.add_node(this->node);
-
-  EXPECT_EQ(0, this->callback_count.load());
-  this->publisher->publish(test_msgs::msg::Empty());
-
-  // Wait for up to 5 seconds for the first message to come available.
-  const std::chrono::milliseconds sleep_per_loop(10);
-  int loops = 0;
-  while (1u != this->callback_count.load() && loops < 500) {
-    rclcpp::sleep_for(sleep_per_loop);
-    executor.spin_some();
-    loops++;
-  }
-  EXPECT_EQ(1u, this->callback_count.load());
-
-  // reset counter
-  this->callback_count.store(0);
-
-  for (size_t ii = 0; ii < kNumMessages; ++ii) {
-    this->publisher->publish(test_msgs::msg::Empty());
-  }
-
-  // Fire a timer every 10ms up to 5 seconds waiting for subscriptions to be read.
-  loops = 0;
-  auto timer = this->node->create_wall_timer(
-    std::chrono::milliseconds(10), [this, &executor, &loops, &kNumMessages]() {
-      loops++;
-      if (kNumMessages == this->callback_count.load() ||
-      loops == 500)
-      {
-        executor.cancel();
-      }
-    });
-  executor.spin();
-  EXPECT_EQ(kNumMessages, this->callback_count.load());
-}
diff --git a/rclcpp/test/rclcpp/executors/test_executors_timer_cancel_behavior.cpp b/rclcpp/test/rclcpp/executors/test_executors_timer_cancel_behavior.cpp
new file mode 100644
index 0000000000..ecee459a19
--- /dev/null
+++ b/rclcpp/test/rclcpp/executors/test_executors_timer_cancel_behavior.cpp
@@ -0,0 +1,408 @@
+// Copyright 2024 Open Source Robotics Foundation, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <cmath>
+#include <cstdlib>
+#include <mutex>
+#include <string>
+
+#include "rclcpp/node.hpp"
+#include "rclcpp/parameter_client.hpp"
+#include "rclcpp/utilities.hpp"
+
+#include "rosgraph_msgs/msg/clock.hpp"
+
+#include "./executor_types.hpp"
+
+using namespace std::chrono_literals;
+
+class TimerNode : public rclcpp::Node
+{
+public:
+  explicit TimerNode(std::string subname)
+  : Node("timer_node", subname)
+  {
+    timer1_ = rclcpp::create_timer(
+      this->get_node_base_interface(), get_node_timers_interface(),
+      get_clock(), 1ms,
+      std::bind(&TimerNode::Timer1Callback, this));
+
+    timer2_ =
+      rclcpp::create_timer(
+      this->get_node_base_interface(), get_node_timers_interface(),
+      get_clock(), 1ms,
+      std::bind(&TimerNode::Timer2Callback, this));
+  }
+
+  int GetTimer1Cnt() {return cnt1_;}
+  int GetTimer2Cnt() {return cnt2_;}
+
+  void ResetTimer1()
+  {
+    timer1_->reset();
+  }
+
+  void ResetTimer2()
+  {
+    timer2_->reset();
+  }
+
+  void CancelTimer1()
+  {
+    RCLCPP_DEBUG(this->get_logger(), "Timer 1 cancelling!");
+    timer1_->cancel();
+  }
+
+  void CancelTimer2()
+  {
+    RCLCPP_DEBUG(this->get_logger(), "Timer 2 cancelling!");
+    timer2_->cancel();
+  }
+
+private:
+  void Timer1Callback()
+  {
+    RCLCPP_DEBUG(this->get_logger(), "Timer 1!");
+    cnt1_++;
+  }
+
+  void Timer2Callback()
+  {
+    RCLCPP_DEBUG(this->get_logger(), "Timer 2!");
+    cnt2_++;
+  }
+
+  rclcpp::TimerBase::SharedPtr timer1_;
+  rclcpp::TimerBase::SharedPtr timer2_;
+  int cnt1_ = 0;
+  int cnt2_ = 0;
+};
+
+// Sets up a separate thread to publish /clock messages.
+// Clock rate relative to real clock is controlled by realtime_update_rate.
+// This is set conservatively slow to ensure unit tests are reliable on Windows
+// environments, where timing performance is subpar.
+//
+// Use `sleep_for` in tests to advance the clock. Clock should run and be published
+// in separate thread continuously to ensure correct behavior in node under test.
+class ClockPublisher : public rclcpp::Node
+{
+public:
+  explicit ClockPublisher(float simulated_clock_step = .001f, float realtime_update_rate = 0.25f)
+  : Node("clock_publisher"),
+    ros_update_duration_(0, 0),
+    realtime_clock_step_(0, 0),
+    rostime_(0, 0)
+  {
+    clock_publisher_ = this->create_publisher<rosgraph_msgs::msg::Clock>("clock", 10);
+    realtime_clock_step_ =
+      rclcpp::Duration::from_seconds(simulated_clock_step / realtime_update_rate);
+    ros_update_duration_ = rclcpp::Duration::from_seconds(simulated_clock_step);
+
+    timer_thread_ = std::thread(&ClockPublisher::RunTimer, this);
+  }
+
+  ~ClockPublisher()
+  {
+    running_ = false;
+    if (timer_thread_.joinable()) {
+      timer_thread_.join();
+    }
+  }
+
+  void sleep_for(rclcpp::Duration duration)
+  {
+    rclcpp::Time start_time(0, 0, RCL_ROS_TIME);
+    {
+      const std::lock_guard<std::mutex> lock(mutex_);
+      start_time = rostime_;
+    }
+    rclcpp::Time current_time = start_time;
+
+    while (true) {
+      {
+        const std::lock_guard<std::mutex> lock(mutex_);
+        current_time = rostime_;
+      }
+      if ((current_time - start_time) >= duration) {
+        return;
+      }
+      std::this_thread::sleep_for(realtime_clock_step_.to_chrono<std::chrono::milliseconds>());
+      rostime_ += ros_update_duration_;
+    }
+  }
+
+private:
+  void RunTimer()
+  {
+    while (running_) {
+      PublishClock();
+      std::this_thread::sleep_for(realtime_clock_step_.to_chrono<std::chrono::milliseconds>());
+    }
+  }
+
+  void PublishClock()
+  {
+    const std::lock_guard<std::mutex> lock(mutex_);
+    auto message = rosgraph_msgs::msg::Clock();
+    message.clock = rostime_;
+    clock_publisher_->publish(message);
+  }
+
+  rclcpp::Publisher<rosgraph_msgs::msg::Clock>::SharedPtr clock_publisher_;
+
+  rclcpp::Duration ros_update_duration_;
+  rclcpp::Duration realtime_clock_step_;
+  // Rostime must be guarded by a mutex, since accessible in running thread
+  // as well as sleep_for
+  rclcpp::Time rostime_;
+  std::mutex mutex_;
+  std::thread timer_thread_;
+  std::atomic<bool> running_ = true;
+};
+
+
+template<typename T>
+class TestTimerCancelBehavior : public ::testing::Test
+{
+public:
+  static void SetUpTestCase()
+  {
+    rclcpp::init(0, nullptr);
+  }
+
+  static void TearDownTestCase()
+  {
+    rclcpp::shutdown();
+  }
+
+  void SetUp()
+  {
+    const auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
+    std::stringstream test_name;
+    test_name << test_info->test_case_name() << "_" << test_info->name();
+    node = std::make_shared<TimerNode>(test_name.str());
+    param_client = std::make_shared<rclcpp::SyncParametersClient>(node);
+    ASSERT_TRUE(param_client->wait_for_service(5s));
+
+    auto set_parameters_results = param_client->set_parameters(
+      {rclcpp::Parameter("use_sim_time", false)});
+    for (auto & result : set_parameters_results) {
+      ASSERT_TRUE(result.successful);
+    }
+
+    // Run standalone thread to publish clock time
+    sim_clock_node = std::make_shared<ClockPublisher>();
+
+    // Spin the executor in a standalone thread
+    executor.add_node(this->node);
+    standalone_thread = std::thread(
+      [this]() {
+        executor.spin();
+      });
+  }
+
+  void TearDown()
+  {
+    node.reset();
+
+    // Clean up thread object
+    if (standalone_thread.joinable()) {
+      standalone_thread.join();
+    }
+  }
+
+  std::shared_ptr<TimerNode> node;
+  std::shared_ptr<ClockPublisher> sim_clock_node;
+  rclcpp::SyncParametersClient::SharedPtr param_client;
+  std::thread standalone_thread;
+  T executor;
+};
+
+TYPED_TEST_SUITE(TestTimerCancelBehavior, ExecutorTypes, ExecutorTypeNames);
+
+TYPED_TEST(TestTimerCancelBehavior, testTimer1CancelledWithExecutorSpin) {
+  // Validate that cancelling one timer yields no change in behavior for other
+  // timers. Specifically, this tests the behavior when using spin() to run the
+  // executor, which is the most common usecase.
+
+  // Cancel to stop the spin after some time.
+  this->sim_clock_node->sleep_for(50ms);
+  this->node->CancelTimer1();
+  this->sim_clock_node->sleep_for(150ms);
+  this->executor.cancel();
+
+  int t1_runs = this->node->GetTimer1Cnt();
+  int t2_runs = this->node->GetTimer2Cnt();
+  EXPECT_NE(t1_runs, t2_runs);
+  // Check that t2 has significantly more calls
+  EXPECT_LT(t1_runs + 50, t2_runs);
+}
+
+TYPED_TEST(TestTimerCancelBehavior, testTimer2CancelledWithExecutorSpin) {
+  // Validate that cancelling one timer yields no change in behavior for other
+  // timers. Specifically, this tests the behavior when using spin() to run the
+  // executor, which is the most common usecase.
+
+  // Cancel to stop the spin after some time.
+  this->sim_clock_node->sleep_for(50ms);
+  this->node->CancelTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  this->executor.cancel();
+
+  int t1_runs = this->node->GetTimer1Cnt();
+  int t2_runs = this->node->GetTimer2Cnt();
+  EXPECT_NE(t1_runs, t2_runs);
+  // Check that t1 has significantly more calls
+  EXPECT_LT(t2_runs + 50, t1_runs);
+}
+
+TYPED_TEST(TestTimerCancelBehavior, testHeadTimerCancelThenResetBehavior) {
+  // Validate that cancelling timer doesn't affect operation of other timers,
+  // and that the cancelled timer starts executing normally once reset manually.
+
+  // Cancel to stop the spin after some time.
+  this->sim_clock_node->sleep_for(50ms);
+  this->node->CancelTimer1();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_initial = this->node->GetTimer1Cnt();
+  int t2_runs_initial = this->node->GetTimer2Cnt();
+
+  // Manually reset timer 1, then sleep again
+  // Counts should update.
+  this->node->ResetTimer1();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_final = this->node->GetTimer1Cnt();
+  int t2_runs_final = this->node->GetTimer2Cnt();
+
+  this->executor.cancel();
+
+  // T1 should have been restarted, and execute about 15 additional times.
+  // Check 10 greater than initial, to account for some timing jitter.
+  EXPECT_LT(t1_runs_initial + 50, t1_runs_final);
+
+  EXPECT_LT(t1_runs_initial + 50, t2_runs_initial);
+  // Check that t2 has significantly more calls, and keeps getting called.
+  EXPECT_LT(t2_runs_initial + 50, t2_runs_final);
+}
+
+TYPED_TEST(TestTimerCancelBehavior, testBackTimerCancelThenResetBehavior) {
+  // Validate that cancelling timer doesn't affect operation of other timers,
+  // and that the cancelled timer starts executing normally once reset manually.
+
+  // Cancel to stop the spin after some time.
+  this->sim_clock_node->sleep_for(50ms);
+  this->node->CancelTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_initial = this->node->GetTimer1Cnt();
+  int t2_runs_initial = this->node->GetTimer2Cnt();
+
+  // Manually reset timer 1, then sleep again
+  // Counts should update.
+  this->node->ResetTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_final = this->node->GetTimer1Cnt();
+  int t2_runs_final = this->node->GetTimer2Cnt();
+
+  this->executor.cancel();
+
+  // T2 should have been restarted, and execute about 15 additional times.
+  // Check 10 greater than initial, to account for some timing jitter.
+  EXPECT_LT(t2_runs_initial + 50, t2_runs_final);
+
+  EXPECT_LT(t2_runs_initial + 50, t1_runs_initial);
+  // Check that t1 has significantly more calls, and keeps getting called.
+  EXPECT_LT(t1_runs_initial + 50, t1_runs_final);
+}
+
+TYPED_TEST(TestTimerCancelBehavior, testBothTimerCancelThenResetT1Behavior) {
+  // Validate behavior from cancelling 2 timers, then only re-enabling one of them.
+  // Ensure that only the reset timer is executed.
+
+  // Cancel to stop the spin after some time.
+  this->sim_clock_node->sleep_for(50ms);
+  this->node->CancelTimer1();
+  this->node->CancelTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_initial = this->node->GetTimer1Cnt();
+  int t2_runs_initial = this->node->GetTimer2Cnt();
+
+  // Manually reset timer 1, then sleep again
+  // Counts should update.
+  this->node->ResetTimer1();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_intermediate = this->node->GetTimer1Cnt();
+  int t2_runs_intermediate = this->node->GetTimer2Cnt();
+
+  this->node->ResetTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_final = this->node->GetTimer1Cnt();
+  int t2_runs_final = this->node->GetTimer2Cnt();
+
+  this->executor.cancel();
+
+  // T1 and T2 should have the same initial count.
+  EXPECT_LE(std::abs(t1_runs_initial - t2_runs_initial), 1);
+
+  // Expect that T1 has up to 15 more calls than t2. Add some buffer
+  // to account for jitter.
+  EXPECT_EQ(t2_runs_initial, t2_runs_intermediate);
+  EXPECT_LT(t1_runs_initial + 50, t1_runs_intermediate);
+
+  // Expect that by end of test, both are running properly again.
+  EXPECT_LT(t1_runs_intermediate + 50, t1_runs_final);
+  EXPECT_LT(t2_runs_intermediate + 50, t2_runs_final);
+}
+
+TYPED_TEST(TestTimerCancelBehavior, testBothTimerCancelThenResetT2Behavior) {
+  // Validate behavior from cancelling 2 timers, then only re-enabling one of them.
+  // Ensure that only the reset timer is executed.
+
+  // Cancel to stop the spin after some time.
+  this->sim_clock_node->sleep_for(50ms);
+  this->node->CancelTimer1();
+  this->node->CancelTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_initial = this->node->GetTimer1Cnt();
+  int t2_runs_initial = this->node->GetTimer2Cnt();
+
+  // Manually reset timer 1, then sleep again
+  // Counts should update.
+  this->node->ResetTimer2();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_intermediate = this->node->GetTimer1Cnt();
+  int t2_runs_intermediate = this->node->GetTimer2Cnt();
+
+  this->node->ResetTimer1();
+  this->sim_clock_node->sleep_for(150ms);
+  int t1_runs_final = this->node->GetTimer1Cnt();
+  int t2_runs_final = this->node->GetTimer2Cnt();
+
+  this->executor.cancel();
+
+  // T1 and T2 should have the same initial count.
+  EXPECT_LE(std::abs(t1_runs_initial - t2_runs_initial), 1);
+
+  // Expect that T1 has up to 15 more calls than t2. Add some buffer
+  // to account for jitter.
+  EXPECT_EQ(t1_runs_initial, t1_runs_intermediate);
+  EXPECT_LT(t2_runs_initial + 50, t2_runs_intermediate);
+
+  // Expect that by end of test, both are running properly again.
+  EXPECT_LT(t1_runs_intermediate + 50, t1_runs_final);
+  EXPECT_LT(t2_runs_intermediate + 50, t2_runs_final);
+}
diff --git a/rclcpp/test/rclcpp/executors/test_static_single_threaded_executor.cpp b/rclcpp/test/rclcpp/executors/test_static_single_threaded_executor.cpp
index 5ca6c1c25a..1a0f3f88c4 100644
--- a/rclcpp/test/rclcpp/executors/test_static_single_threaded_executor.cpp
+++ b/rclcpp/test/rclcpp/executors/test_static_single_threaded_executor.cpp
@@ -56,7 +56,7 @@ TEST_F(TestStaticSingleThreadedExecutor, add_callback_group_trigger_guard_failed
       "lib:rclcpp", rcl_trigger_guard_condition, RCL_RET_ERROR);
     RCLCPP_EXPECT_THROW_EQ(
       executor.add_callback_group(cb_group, node->get_node_base_interface(), true),
-      std::runtime_error("error not set"));
+      std::runtime_error("Failed to trigger guard condition on callback group add: error not set"));
   }
 }
 
@@ -69,7 +69,7 @@ TEST_F(TestStaticSingleThreadedExecutor, add_node_trigger_guard_failed) {
       "lib:rclcpp", rcl_trigger_guard_condition, RCL_RET_ERROR);
     RCLCPP_EXPECT_THROW_EQ(
       executor.add_node(node),
-      std::runtime_error("error not set"));
+      std::runtime_error("Failed to trigger guard condition on node add: error not set"));
   }
 }
 
@@ -86,7 +86,8 @@ TEST_F(TestStaticSingleThreadedExecutor, remove_callback_group_trigger_guard_fai
       "lib:rclcpp", rcl_trigger_guard_condition, RCL_RET_ERROR);
     RCLCPP_EXPECT_THROW_EQ(
       executor.remove_callback_group(cb_group, true),
-      std::runtime_error("error not set"));
+      std::runtime_error(
+        "Failed to trigger guard condition on callback group remove: error not set"));
   }
 }
 
@@ -99,7 +100,7 @@ TEST_F(TestStaticSingleThreadedExecutor, remove_node_failed) {
       "lib:rclcpp", rcl_trigger_guard_condition, RCL_RET_ERROR);
     RCLCPP_EXPECT_THROW_EQ(
       executor.remove_node(node, true),
-      std::runtime_error("Node needs to be associated with this executor."));
+      std::runtime_error("Node '/ns/node' needs to be associated with an executor."));
   }
 }
 
@@ -114,7 +115,7 @@ TEST_F(TestStaticSingleThreadedExecutor, remove_node_trigger_guard_failed) {
       "lib:rclcpp", rcl_trigger_guard_condition, RCL_RET_ERROR);
     RCLCPP_EXPECT_THROW_EQ(
       executor.remove_node(node, true),
-      std::runtime_error("error not set"));
+      std::runtime_error("Failed to trigger guard condition on node remove: error not set"));
   }
 }
 
diff --git a/rclcpp/test/rclcpp/node_interfaces/test_node_timers.cpp b/rclcpp/test/rclcpp/node_interfaces/test_node_timers.cpp
index d038a4b44d..194636f831 100644
--- a/rclcpp/test/rclcpp/node_interfaces/test_node_timers.cpp
+++ b/rclcpp/test/rclcpp/node_interfaces/test_node_timers.cpp
@@ -32,8 +32,8 @@ class TestTimer : public rclcpp::TimerBase
   : TimerBase(node->get_clock(), std::chrono::nanoseconds(1),
       node->get_node_base_interface()->get_context()) {}
 
-  bool call() override {return true;}
-  void execute_callback() override {}
+  std::shared_ptr<void> call() override {return nullptr;}
+  void execute_callback(const std::shared_ptr<void> &) override {}
   bool is_steady() override {return false;}
 };
 
diff --git a/rclcpp/test/rclcpp/test_add_callback_groups_to_executor.cpp b/rclcpp/test/rclcpp/test_add_callback_groups_to_executor.cpp
index 02fa0b7a94..ec4df4476c 100644
--- a/rclcpp/test/rclcpp/test_add_callback_groups_to_executor.cpp
+++ b/rclcpp/test/rclcpp/test_add_callback_groups_to_executor.cpp
@@ -177,21 +177,31 @@ TYPED_TEST(TestAddCallbackGroupsToExecutor, add_duplicate_callback_groups)
 TYPED_TEST(TestAddCallbackGroupsToExecutor, add_callback_groups_after_add_node_to_executor)
 {
   rclcpp::executors::MultiThreadedExecutor executor;
+
+  auto count_callback_groups_in_node = [](auto node) {
+      size_t num = 0;
+      node->get_node_base_interface()->for_each_callback_group(
+        [&num](auto) {
+          num++;
+        });
+      return num;
+    };
+
   auto node = std::make_shared<rclcpp::Node>("my_node", "/ns");
   executor.add_node(node->get_node_base_interface());
-  ASSERT_EQ(executor.get_all_callback_groups().size(), 1u);
-  std::atomic_int timer_count {0};
+  ASSERT_EQ(executor.get_all_callback_groups().size(), count_callback_groups_in_node(node));
+  std::atomic_size_t timer_count {0};
   auto timer_callback = [&executor, &timer_count]() {
-      if (timer_count > 0) {
-        ASSERT_EQ(executor.get_all_callback_groups().size(), 3u);
+      auto cur_timer_count = timer_count++;
+      printf("in timer_callback(%zu)\n", cur_timer_count);
+      if (cur_timer_count > 0) {
         executor.cancel();
       }
-      timer_count++;
     };
   rclcpp::CallbackGroup::SharedPtr cb_grp = node->create_callback_group(
     rclcpp::CallbackGroupType::MutuallyExclusive);
   rclcpp::TimerBase::SharedPtr timer_ = node->create_wall_timer(
-    2s, timer_callback, cb_grp);
+    1s, timer_callback, cb_grp);
   rclcpp::CallbackGroup::SharedPtr cb_grp2 = node->create_callback_group(
     rclcpp::CallbackGroupType::MutuallyExclusive, false);
   auto timer2_callback = []() {};
@@ -203,6 +213,7 @@ TYPED_TEST(TestAddCallbackGroupsToExecutor, add_callback_groups_after_add_node_t
   rclcpp::TimerBase::SharedPtr timer3_ = node->create_wall_timer(
     2s, timer3_callback, cb_grp3);
   executor.spin();
+  ASSERT_GT(timer_count.load(), 0u);
 }
 
 /*
diff --git a/rclcpp/test/rclcpp/test_executor.cpp b/rclcpp/test/rclcpp/test_executor.cpp
index bdbb0a1079..5ee9573c3f 100644
--- a/rclcpp/test/rclcpp/test_executor.cpp
+++ b/rclcpp/test/rclcpp/test_executor.cpp
@@ -46,23 +46,6 @@ class DummyExecutor : public rclcpp::Executor
   {
     spin_node_once_nanoseconds(node, std::chrono::milliseconds(100));
   }
-
-  rclcpp::memory_strategy::MemoryStrategy * memory_strategy_ptr()
-  {
-    return memory_strategy_.get();
-  }
-
-  rclcpp::node_interfaces::NodeBaseInterface::SharedPtr local_get_node_by_group(
-    rclcpp::CallbackGroup::SharedPtr group)
-  {
-    std::lock_guard<std::mutex> guard_{mutex_};  // only to make the TSA happy
-    return get_node_by_group(weak_groups_to_nodes_, group);
-  }
-
-  rclcpp::CallbackGroup::SharedPtr local_get_group_by_timer(rclcpp::TimerBase::SharedPtr timer)
-  {
-    return get_group_by_timer(timer);
-  }
 };
 
 class TestExecutor : public ::testing::Test
@@ -130,7 +113,7 @@ TEST_F(TestExecutor, constructor_bad_wait_set_init) {
   auto mock = mocking_utils::patch_and_return("lib:rclcpp", rcl_wait_set_init, RCL_RET_ERROR);
   RCLCPP_EXPECT_THROW_EQ(
     static_cast<void>(std::make_unique<DummyExecutor>()),
-    std::runtime_error("Failed to create wait set in Executor constructor: error not set"));
+    std::runtime_error("Failed to create wait set: error not set"));
 }
 
 TEST_F(TestExecutor, add_callback_group_twice) {
@@ -142,7 +125,7 @@ TEST_F(TestExecutor, add_callback_group_twice) {
   cb_group->get_associated_with_executor_atomic().exchange(false);
   RCLCPP_EXPECT_THROW_EQ(
     dummy.add_callback_group(cb_group, node->get_node_base_interface(), false),
-    std::runtime_error("Callback group was already added to executor."));
+    std::runtime_error("Callback group has already been added to this executor."));
 }
 
 TEST_F(TestExecutor, add_callback_group_failed_trigger_guard_condition) {
@@ -168,9 +151,15 @@ TEST_F(TestExecutor, remove_callback_group_null_node) {
 
   node.reset();
 
+
+  /**
+   * TODO(mjcarroll): Assert this when we are enforcing that nodes must be destroyed
+   * after their created callback groups.
   RCLCPP_EXPECT_THROW_EQ(
     dummy.remove_callback_group(cb_group, false),
     std::runtime_error("Node must not be deleted before its callback group(s)."));
+   */
+  EXPECT_NO_THROW(dummy.remove_callback_group(cb_group, false));
 }
 
 TEST_F(TestExecutor, remove_callback_group_failed_trigger_guard_condition) {
@@ -197,7 +186,7 @@ TEST_F(TestExecutor, remove_node_not_associated) {
 
   RCLCPP_EXPECT_THROW_EQ(
     dummy.remove_node(node->get_node_base_interface(), false),
-    std::runtime_error("Node needs to be associated with an executor."));
+    std::runtime_error("Node '/ns/node' needs to be associated with an executor."));
 }
 
 TEST_F(TestExecutor, remove_node_associated_with_different_executor) {
@@ -211,7 +200,7 @@ TEST_F(TestExecutor, remove_node_associated_with_different_executor) {
 
   RCLCPP_EXPECT_THROW_EQ(
     dummy2.remove_node(node1->get_node_base_interface(), false),
-    std::runtime_error("Node needs to be associated with this executor."));
+    std::runtime_error("Node '/ns/node1' needs to be associated with this executor."));
 }
 
 TEST_F(TestExecutor, spin_node_once_nanoseconds) {
@@ -328,42 +317,14 @@ TEST_F(TestExecutor, cancel_failed_trigger_guard_condition) {
     std::runtime_error("Failed to trigger guard condition in cancel: error not set"));
 }
 
-TEST_F(TestExecutor, set_memory_strategy_nullptr) {
-  DummyExecutor dummy;
-
-  RCLCPP_EXPECT_THROW_EQ(
-    dummy.set_memory_strategy(nullptr),
-    std::runtime_error("Received NULL memory strategy in executor."));
-}
-
-TEST_F(TestExecutor, set_memory_strategy) {
-  DummyExecutor dummy;
-  rclcpp::memory_strategy::MemoryStrategy::SharedPtr strategy =
-    std::make_shared<
-    rclcpp::memory_strategies::allocator_memory_strategy::AllocatorMemoryStrategy<>>();
-
-  dummy.set_memory_strategy(strategy);
-  EXPECT_EQ(dummy.memory_strategy_ptr(), strategy.get());
-}
-
-TEST_F(TestExecutor, spin_once_failed_trigger_guard_condition) {
-  DummyExecutor dummy;
-  auto node = std::make_shared<rclcpp::Node>("node", "ns");
-  auto timer =
-    node->create_wall_timer(std::chrono::milliseconds(1), [&]() {});
-
-  dummy.add_node(node);
-  // Wait for the wall timer to have expired.
-  std::this_thread::sleep_for(std::chrono::milliseconds(50));
-  auto mock = mocking_utils::patch_and_return(
-    "lib:rclcpp", rcl_trigger_guard_condition, RCL_RET_ERROR);
+TEST_F(TestExecutor, create_executor_fail_wait_set_clear) {
+  auto mock = mocking_utils::patch_and_return("lib:rclcpp", rcl_wait_set_clear, RCL_RET_ERROR);
   RCLCPP_EXPECT_THROW_EQ(
-    dummy.spin_once(std::chrono::milliseconds(1)),
-    std::runtime_error(
-      "Failed to trigger guard condition from execute_any_executable: error not set"));
+    DummyExecutor dummy,
+    std::runtime_error("Couldn't clear the wait set: error not set"));
 }
 
-TEST_F(TestExecutor, spin_some_fail_wait_set_clear) {
+TEST_F(TestExecutor, spin_all_fail_wait_set_clear) {
   DummyExecutor dummy;
   auto node = std::make_shared<rclcpp::Node>("node", "ns");
   auto timer =
@@ -371,9 +332,10 @@ TEST_F(TestExecutor, spin_some_fail_wait_set_clear) {
 
   dummy.add_node(node);
   auto mock = mocking_utils::patch_and_return("lib:rclcpp", rcl_wait_set_clear, RCL_RET_ERROR);
+
   RCLCPP_EXPECT_THROW_EQ(
-    dummy.spin_some(std::chrono::milliseconds(1)),
-    std::runtime_error("Couldn't clear wait set: error not set"));
+    dummy.spin_all(std::chrono::milliseconds(1)),
+    std::runtime_error("Couldn't clear the wait set: error not set"));
 }
 
 TEST_F(TestExecutor, spin_some_fail_wait_set_resize) {
@@ -401,7 +363,7 @@ TEST_F(TestExecutor, spin_some_fail_add_handles_to_wait_set) {
     RCL_RET_ERROR);
   RCLCPP_EXPECT_THROW_EQ(
     dummy.spin_some(std::chrono::milliseconds(1)),
-    std::runtime_error("Couldn't fill wait set"));
+    std::runtime_error("Couldn't fill wait set: error not set"));
 }
 
 TEST_F(TestExecutor, spin_some_fail_wait) {
@@ -417,71 +379,6 @@ TEST_F(TestExecutor, spin_some_fail_wait) {
     std::runtime_error("rcl_wait() failed: error not set"));
 }
 
-TEST_F(TestExecutor, get_node_by_group_null_group) {
-  DummyExecutor dummy;
-  ASSERT_EQ(nullptr, dummy.local_get_node_by_group(nullptr));
-}
-
-TEST_F(TestExecutor, get_node_by_group) {
-  DummyExecutor dummy;
-  auto node = std::make_shared<rclcpp::Node>("node", "ns");
-  rclcpp::CallbackGroup::SharedPtr cb_group = node->create_callback_group(
-    rclcpp::CallbackGroupType::MutuallyExclusive);
-  dummy.add_callback_group(cb_group, node->get_node_base_interface(), false);
-  ASSERT_EQ(node->get_node_base_interface().get(), dummy.local_get_node_by_group(cb_group).get());
-}
-
-TEST_F(TestExecutor, get_node_by_group_not_found) {
-  DummyExecutor dummy;
-  auto node = std::make_shared<rclcpp::Node>("node", "ns");
-  rclcpp::CallbackGroup::SharedPtr cb_group = node->create_callback_group(
-    rclcpp::CallbackGroupType::MutuallyExclusive);
-  ASSERT_EQ(nullptr, dummy.local_get_node_by_group(cb_group).get());
-}
-
-TEST_F(TestExecutor, get_group_by_timer_nullptr) {
-  DummyExecutor dummy;
-  ASSERT_EQ(nullptr, dummy.local_get_group_by_timer(nullptr));
-}
-
-TEST_F(TestExecutor, get_group_by_timer) {
-  DummyExecutor dummy;
-  auto node = std::make_shared<rclcpp::Node>("node", "ns");
-  rclcpp::CallbackGroup::SharedPtr cb_group = node->create_callback_group(
-    rclcpp::CallbackGroupType::MutuallyExclusive);
-  auto timer =
-    node->create_wall_timer(std::chrono::milliseconds(1), [&]() {}, cb_group);
-  dummy.add_node(node);
-
-  ASSERT_EQ(cb_group.get(), dummy.local_get_group_by_timer(timer).get());
-}
-
-TEST_F(TestExecutor, get_group_by_timer_with_deleted_group) {
-  DummyExecutor dummy;
-  auto node = std::make_shared<rclcpp::Node>("node", "ns");
-  rclcpp::CallbackGroup::SharedPtr cb_group = node->create_callback_group(
-    rclcpp::CallbackGroupType::MutuallyExclusive);
-  auto timer =
-    node->create_wall_timer(std::chrono::milliseconds(1), [&]() {}, cb_group);
-  dummy.add_node(node);
-
-  cb_group.reset();
-
-  ASSERT_EQ(nullptr, dummy.local_get_group_by_timer(timer).get());
-}
-
-TEST_F(TestExecutor, get_group_by_timer_add_callback_group) {
-  DummyExecutor dummy;
-  auto node = std::make_shared<rclcpp::Node>("node", "ns");
-  rclcpp::CallbackGroup::SharedPtr cb_group = node->create_callback_group(
-    rclcpp::CallbackGroupType::MutuallyExclusive);
-  auto timer =
-    node->create_wall_timer(std::chrono::milliseconds(1), [&]() {}, cb_group);
-  dummy.add_callback_group(cb_group, node->get_node_base_interface(), false);
-
-  ASSERT_EQ(cb_group.get(), dummy.local_get_group_by_timer(timer).get());
-}
-
 TEST_F(TestExecutor, spin_until_future_complete_in_spin_until_future_complete) {
   DummyExecutor dummy;
   auto node = std::make_shared<rclcpp::Node>("node", "ns");
diff --git a/rclcpp/test/rclcpp/test_guard_condition.cpp b/rclcpp/test/rclcpp/test_guard_condition.cpp
index 481051ccf9..1e72264869 100644
--- a/rclcpp/test/rclcpp/test_guard_condition.cpp
+++ b/rclcpp/test/rclcpp/test_guard_condition.cpp
@@ -164,3 +164,21 @@ TEST_F(TestGuardCondition, set_on_trigger_callback) {
     EXPECT_EQ(c1.load(), 2u);
   }
 }
+
+/*
+ * Testing that callback and waitset are both notified by triggering gc
+ */
+TEST_F(TestGuardCondition, callback_and_waitset) {
+  auto gc = std::make_shared<rclcpp::GuardCondition>();
+  std::atomic<size_t> c1 {0};
+  auto increase_c1_cb = [&c1](size_t count_msgs) {c1 += count_msgs;};
+  gc->set_on_trigger_callback(increase_c1_cb);
+
+  rclcpp::WaitSet wait_set;
+  wait_set.add_guard_condition(gc);
+
+  gc->trigger();
+
+  EXPECT_EQ(rclcpp::WaitResultKind::Ready, wait_set.wait(std::chrono::seconds(1)).kind());
+  EXPECT_EQ(c1.load(), 1u);
+}
diff --git a/rclcpp/test/rclcpp/test_intra_process_manager.cpp b/rclcpp/test/rclcpp/test_intra_process_manager.cpp
index 9d8df74352..eeb4715973 100644
--- a/rclcpp/test/rclcpp/test_intra_process_manager.cpp
+++ b/rclcpp/test/rclcpp/test_intra_process_manager.cpp
@@ -247,60 +247,6 @@ class Publisher : public PublisherBase
 }  // namespace mock
 }  // namespace rclcpp
 
-namespace rclcpp
-{
-namespace experimental
-{
-namespace buffers
-{
-namespace mock
-{
-template<
-  typename MessageT,
-  typename Alloc = std::allocator<void>,
-  typename MessageDeleter = std::default_delete<MessageT>>
-class IntraProcessBuffer
-{
-public:
-  using ConstMessageSharedPtr = std::shared_ptr<const MessageT>;
-  using MessageUniquePtr = std::unique_ptr<MessageT>;
-
-  RCLCPP_SMART_PTR_DEFINITIONS(IntraProcessBuffer)
-
-  IntraProcessBuffer()
-  {}
-
-  void add(ConstMessageSharedPtr msg)
-  {
-    message_ptr = reinterpret_cast<std::uintptr_t>(msg.get());
-    shared_msg = msg;
-  }
-
-  void add(MessageUniquePtr msg)
-  {
-    message_ptr = reinterpret_cast<std::uintptr_t>(msg.get());
-    unique_msg = std::move(msg);
-  }
-
-  void pop(std::uintptr_t & msg_ptr)
-  {
-    msg_ptr = message_ptr;
-    message_ptr = 0;
-  }
-
-  // need to store the messages somewhere otherwise the memory address will be reused
-  ConstMessageSharedPtr shared_msg;
-  MessageUniquePtr unique_msg;
-
-  std::uintptr_t message_ptr;
-};
-
-}  // namespace mock
-}  // namespace buffers
-}  // namespace experimental
-}  // namespace rclcpp
-
-
 namespace rclcpp
 {
 namespace experimental
diff --git a/rclcpp/test/rclcpp/test_publisher.cpp b/rclcpp/test/rclcpp/test_publisher.cpp
index 558bc39912..62ac3d832e 100644
--- a/rclcpp/test/rclcpp/test_publisher.cpp
+++ b/rclcpp/test/rclcpp/test_publisher.cpp
@@ -698,6 +698,8 @@ TEST_F(TestPublisher, intra_process_transient_local) {
   EXPECT_EQ(1, pub_ipm_enabled_transient_local_disabled->get_intra_process_subscription_count());
   EXPECT_EQ(0, pub_ipm_disabled_transient_local_disabled->get_intra_process_subscription_count());
 
+  /*
+  // JF: lowest_available_ipm_capacity API not available in our fork of rclcpp @ irobot/humble
   EXPECT_EQ(
     history_depth - 1u,
     pub_ipm_enabled_transient_local_enabled->lowest_available_ipm_capacity());
@@ -706,6 +708,7 @@ TEST_F(TestPublisher, intra_process_transient_local) {
     history_depth,
     pub_ipm_enabled_transient_local_disabled->lowest_available_ipm_capacity());
   EXPECT_EQ(0, pub_ipm_disabled_transient_local_disabled->lowest_available_ipm_capacity());
+  */
 
   EXPECT_TRUE(callback1.called);
   EXPECT_FALSE(callback2.called);
diff --git a/rclcpp/test/rclcpp/test_reinitialized_timers.cpp b/rclcpp/test/rclcpp/test_reinitialized_timers.cpp
new file mode 100644
index 0000000000..ab3e86b00f
--- /dev/null
+++ b/rclcpp/test/rclcpp/test_reinitialized_timers.cpp
@@ -0,0 +1,107 @@
+// Copyright 2024 iRobot Corporation. All Rights Reserved.
+
+#include <gtest/gtest.h>
+
+#include <rclcpp/rclcpp.hpp>
+
+#include "rclcpp/experimental/executors/events_executor/events_executor.hpp"
+#include "rclcpp/experimental/executors/events_executor/lock_free_events_queue.hpp"
+
+using rclcpp::experimental::executors::EventsExecutor;
+
+class TimersTest
+: public testing::Test
+{
+public:
+    void SetUp() override
+    {
+        rclcpp::init(0, nullptr);
+    }
+
+    void TearDown() override
+    {
+        rclcpp::shutdown();
+    }
+};
+
+TEST_F(TimersTest, TimersWithSamePeriod)
+{
+    auto timers_period = std::chrono::milliseconds(50);
+    auto node = std::make_shared<rclcpp::Node>("test_node");
+    auto events_queue = std::make_unique<rclcpp::experimental::executors::LockFreeEventsQueue>();
+    auto executor = std::make_unique<EventsExecutor>(std::move(events_queue), true, rclcpp::ExecutorOptions());
+
+    executor->add_node(node);
+
+    size_t count_1 = 0;
+    auto timer_1 = rclcpp::create_timer(
+        node,
+        node->get_clock(),
+        rclcpp::Duration(timers_period),
+        [&count_1]() {
+            count_1++;
+        });
+
+    size_t count_2 = 0;
+    auto timer_2 = rclcpp::create_timer(
+        node,
+        node->get_clock(),
+        rclcpp::Duration(timers_period),
+        [&count_2]() {
+            count_2++;
+        });
+
+    {
+        std::thread executor_thread([&executor](){
+            executor->spin();
+        });
+
+        while (count_2 < 10u) {
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+        }
+        executor->cancel();
+        executor_thread.join();
+
+        EXPECT_GE(count_2, 10u);
+        EXPECT_LE(count_2 - count_1, 1u);
+    }
+
+    count_1 = 0;
+    timer_1 = rclcpp::create_timer(
+        node,
+        node->get_clock(),
+        rclcpp::Duration(timers_period),
+        [&count_1]() {
+            count_1++;
+        });
+
+    count_2 = 0;
+    timer_2 = rclcpp::create_timer(
+        node,
+        node->get_clock(),
+        rclcpp::Duration(timers_period),
+        [&count_2]() {
+            count_2++;
+        });
+
+    {
+        std::thread executor_thread([&executor](){
+            executor->spin();
+        });
+
+        while (count_2 < 10u) {
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+        }
+        executor->cancel();
+        executor_thread.join();
+
+        EXPECT_GE(count_2, 10u);
+        EXPECT_LE(count_2 - count_1, 1u);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/rclcpp/test/rclcpp/test_service.cpp b/rclcpp/test/rclcpp/test_service.cpp
index 90e535cb7a..85f89ecf61 100644
--- a/rclcpp/test/rclcpp/test_service.cpp
+++ b/rclcpp/test/rclcpp/test_service.cpp
@@ -177,7 +177,14 @@ TEST_F(TestService, basic_public_getters) {
     }
     rclcpp::AnyServiceCallback<test_msgs::srv::Empty> cb;
 
+    const rclcpp::Service<test_msgs::srv::Empty> base(
+      node_handle_int->get_node_base_interface(),
+      &service_handle, cb);
+    // Use get_service_handle specific to const service
+    std::shared_ptr<const rcl_service_t> const_service_handle = base.get_service_handle();
+    EXPECT_NE(nullptr, const_service_handle);
 
+    /*
     rclcpp::IntraProcessSetting ipc_setting;
     if (node_base_interface->get_use_intra_process_default()) {
       ipc_setting = rclcpp::IntraProcessSetting::Enable;
@@ -185,12 +192,14 @@ TEST_F(TestService, basic_public_getters) {
       ipc_setting = rclcpp::IntraProcessSetting::Disable;
     }
 
+    // FIXME: where is our Service constructor that supports ipc_setting?
     const rclcpp::Service<test_msgs::srv::Empty> base(
       node_handle_int->get_node_base_interface(),
       &service_handle, cb, ipc_setting);
     // Use get_service_handle specific to const service
     std::shared_ptr<const rcl_service_t> const_service_handle = base.get_service_handle();
     EXPECT_NE(nullptr, const_service_handle);
+    */
 
     EXPECT_EQ(
       RCL_RET_OK, rcl_service_fini(
diff --git a/rclcpp/test/rclcpp/test_timers_manager.cpp b/rclcpp/test/rclcpp/test_timers_manager.cpp
new file mode 100644
index 0000000000..0e49da08e1
--- /dev/null
+++ b/rclcpp/test/rclcpp/test_timers_manager.cpp
@@ -0,0 +1,407 @@
+// Copyright 2023 iRobot Corporation.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <cmath>
+#include <memory>
+#include <utility>
+
+#include "rclcpp/contexts/default_context.hpp"
+#include "rclcpp/experimental/timers_manager.hpp"
+
+using namespace std::chrono_literals;
+
+using rclcpp::experimental::TimersManager;
+
+using CallbackT = std::function<void ()>;
+using TimerT = rclcpp::WallTimer<CallbackT>;
+
+class TestTimersManager : public ::testing::Test
+{
+public:
+  void SetUp()
+  {
+    rclcpp::init(0, nullptr);
+  }
+
+  void TearDown()
+  {
+    rclcpp::shutdown();
+  }
+};
+
+static void execute_all_ready_timers(std::shared_ptr<TimersManager> timers_manager)
+{
+  bool head_was_ready = false;
+  do {
+    head_was_ready = timers_manager->execute_head_timer();
+  } while (head_was_ready);
+}
+
+TEST_F(TestTimersManager, empty_manager)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  EXPECT_EQ(std::chrono::nanoseconds::max(), timers_manager->get_head_timeout());
+  EXPECT_FALSE(timers_manager->execute_head_timer());
+  EXPECT_NO_THROW(timers_manager->clear());
+  EXPECT_NO_THROW(timers_manager->start());
+  EXPECT_NO_THROW(timers_manager->stop());
+}
+
+TEST_F(TestTimersManager, add_run_remove_timer)
+{
+  size_t t_runs = 0;
+  std::chrono::milliseconds timer_period(10);
+
+  auto t = TimerT::make_shared(
+    timer_period,
+    [&t_runs]() {
+      t_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+  std::weak_ptr<TimerT> t_weak = t;
+
+  // Add the timer to the timers manager
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+  timers_manager->add_timer(t);
+
+  // Sleep for more 3 times the timer period
+  std::this_thread::sleep_for(3 * timer_period);
+
+  // The timer is executed only once, even if we slept 3 times the period
+  execute_all_ready_timers(timers_manager);
+  EXPECT_EQ(1u, t_runs);
+
+  // Remove the timer from the manager
+  timers_manager->remove_timer(t);
+
+  t.reset();
+  // The timer is now not valid anymore
+  EXPECT_FALSE(t_weak.lock() != nullptr);
+}
+
+TEST_F(TestTimersManager, clear)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  auto t1 = TimerT::make_shared(1ms, CallbackT(), rclcpp::contexts::get_global_default_context());
+  std::weak_ptr<TimerT> t1_weak = t1;
+  auto t2 = TimerT::make_shared(1ms, CallbackT(), rclcpp::contexts::get_global_default_context());
+  std::weak_ptr<TimerT> t2_weak = t2;
+
+  timers_manager->add_timer(t1);
+  timers_manager->add_timer(t2);
+
+  EXPECT_TRUE(t1_weak.lock() != nullptr);
+  EXPECT_TRUE(t2_weak.lock() != nullptr);
+
+  timers_manager->clear();
+
+  t1.reset();
+  t2.reset();
+
+  EXPECT_FALSE(t1_weak.lock() != nullptr);
+  EXPECT_FALSE(t2_weak.lock() != nullptr);
+}
+
+TEST_F(TestTimersManager, remove_not_existing_timer)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  // Try to remove a nullptr timer
+  EXPECT_NO_THROW(timers_manager->remove_timer(nullptr));
+
+  auto t = TimerT::make_shared(1ms, CallbackT(), rclcpp::contexts::get_global_default_context());
+  timers_manager->add_timer(t);
+
+  // Remove twice the same timer
+  timers_manager->remove_timer(t);
+  EXPECT_NO_THROW(timers_manager->remove_timer(t));
+}
+
+TEST_F(TestTimersManager, timers_thread_exclusive_usage)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  timers_manager->start();
+
+  EXPECT_THROW(timers_manager->start(), std::exception);
+  EXPECT_THROW(timers_manager->get_head_timeout(), std::exception);
+  EXPECT_THROW(timers_manager->execute_head_timer(), std::exception);
+
+  timers_manager->stop();
+
+  EXPECT_NO_THROW(timers_manager->get_head_timeout());
+  EXPECT_NO_THROW(timers_manager->execute_head_timer());
+}
+
+TEST_F(TestTimersManager, add_timer_twice)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  auto t = TimerT::make_shared(1ms, CallbackT(), rclcpp::contexts::get_global_default_context());
+
+  timers_manager->add_timer(t);
+  EXPECT_NO_THROW(timers_manager->add_timer(t));
+}
+
+TEST_F(TestTimersManager, add_nullptr)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  EXPECT_THROW(timers_manager->add_timer(nullptr), std::exception);
+}
+
+TEST_F(TestTimersManager, head_not_ready)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t_runs = 0;
+  auto t = TimerT::make_shared(
+    10s,
+    [&t_runs]() {
+      t_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  timers_manager->add_timer(t);
+
+  // Timer will take 10s to get ready, so nothing to execute here
+  bool ret = timers_manager->execute_head_timer();
+  EXPECT_FALSE(ret);
+  EXPECT_EQ(0u, t_runs);
+}
+
+TEST_F(TestTimersManager, start_stop_timers_thread)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  auto t = TimerT::make_shared(1ms, []() {}, rclcpp::contexts::get_global_default_context());
+  timers_manager->add_timer(t);
+
+  // Calling start multiple times will throw an error
+  EXPECT_NO_THROW(timers_manager->start());
+  EXPECT_THROW(timers_manager->start(), std::exception);
+
+  // Calling stop multiple times does not throw an error
+  EXPECT_NO_THROW(timers_manager->stop());
+  EXPECT_NO_THROW(timers_manager->stop());
+}
+
+TEST_F(TestTimersManager, timers_thread)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  int t1_runs = 0;
+  auto t1 = TimerT::make_shared(
+    1ms,
+    [&t1_runs]() {
+      t1_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  int t2_runs = 0;
+  auto t2 = TimerT::make_shared(
+    1ms,
+    [&t2_runs]() {
+      t2_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  // Add timers
+  timers_manager->add_timer(t1);
+  timers_manager->add_timer(t2);
+
+  // Run timers thread for a while
+  timers_manager->start();
+  std::this_thread::sleep_for(50ms);
+  timers_manager->stop();
+
+  EXPECT_LT(1u, t1_runs);
+  EXPECT_LT(1u, t2_runs);
+  EXPECT_LE(std::abs(t1_runs - t2_runs), 1);
+}
+
+TEST_F(TestTimersManager, destructor)
+{
+  size_t t_runs = 0;
+  auto t = TimerT::make_shared(
+    1ms,
+    [&t_runs]() {
+      t_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+  std::weak_ptr<TimerT> t_weak = t;
+
+  // When the timers manager is destroyed, it will stop the thread
+  // and clear the timers
+  {
+    auto timers_manager = std::make_shared<TimersManager>(
+      rclcpp::contexts::get_global_default_context());
+
+    timers_manager->add_timer(t);
+
+    timers_manager->start();
+    std::this_thread::sleep_for(100ms);
+
+    EXPECT_LT(1u, t_runs);
+  }
+
+  // The thread is not running anymore, so this value does not increase
+  size_t runs = t_runs;
+  std::this_thread::sleep_for(100ms);
+  EXPECT_EQ(runs, t_runs);
+  t.reset();
+  EXPECT_FALSE(t_weak.lock() != nullptr);
+}
+
+TEST_F(TestTimersManager, add_remove_while_thread_running)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t1_runs = 0;
+  auto t1 = TimerT::make_shared(
+    1ms,
+    [&t1_runs]() {
+      t1_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t2_runs = 0;
+  auto t2 = TimerT::make_shared(
+    1ms,
+    [&t2_runs]() {
+      t2_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  // Add timers
+  timers_manager->add_timer(t1);
+
+  // Start timers thread
+  timers_manager->start();
+
+  // After a while remove t1 and add t2
+  std::this_thread::sleep_for(50ms);
+  timers_manager->remove_timer(t1);
+  size_t tmp_t1 = t1_runs;
+  timers_manager->add_timer(t2);
+
+  // Wait some more time and then stop
+  std::this_thread::sleep_for(50ms);
+  timers_manager->stop();
+
+  // t1 has stopped running
+  EXPECT_EQ(tmp_t1, t1_runs);
+  // t2 is correctly running
+  EXPECT_LT(1u, t2_runs);
+}
+
+TEST_F(TestTimersManager, infinite_loop)
+{
+  // This test makes sure that even if timers have a period shorter than the duration
+  // of their callback the functions never block indefinitely.
+
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t1_runs = 0;
+  auto t1 = TimerT::make_shared(
+    1ms,
+    [&t1_runs]() {
+      t1_runs++;
+      std::this_thread::sleep_for(5ms);
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t2_runs = 0;
+  auto t2 = TimerT::make_shared(
+    1ms,
+    [&t2_runs]() {
+      t2_runs++;
+      std::this_thread::sleep_for(5ms);
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  timers_manager->add_timer(t1);
+  timers_manager->add_timer(t2);
+
+  // Start a timers thread and make sure that we can stop it later
+  timers_manager->start();
+  std::this_thread::sleep_for(50ms);
+  timers_manager->stop();
+
+  EXPECT_LT(0u, t1_runs);
+  EXPECT_LT(0u, t2_runs);
+}
+
+// Validate that cancelling one timer yields no change in behavior for other
+// timers.
+TEST_F(TestTimersManager, check_one_timer_cancel_doesnt_affect_other_timers)
+{
+  auto timers_manager = std::make_shared<TimersManager>(
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t1_runs = 0;
+  std::shared_ptr<TimerT> t1;
+  // After a while cancel t1. Don't remove it though.
+  // Simulates typical usage in a Node where a timer is cancelled but not removed,
+  // since typical users aren't going to mess around with the timer manager.
+  t1 = TimerT::make_shared(
+    1ms,
+    [&t1_runs, &t1]() {
+      t1_runs++;
+      if (t1_runs == 5) {
+        t1->cancel();
+      }
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  size_t t2_runs = 0;
+  auto t2 = TimerT::make_shared(
+    1ms,
+    [&t2_runs]() {
+      t2_runs++;
+    },
+    rclcpp::contexts::get_global_default_context());
+
+  // Add timers
+  timers_manager->add_timer(t1);
+  timers_manager->add_timer(t2);
+
+  // Start timers thread
+  timers_manager->start();
+
+  std::this_thread::sleep_for(15ms);
+
+  // t1 has stopped running
+  EXPECT_NE(t1_runs, t2_runs);
+  // Check that t2 has significantly more calls
+  EXPECT_LT(t1_runs + 5, t2_runs);
+  timers_manager->stop();
+}