acts-project · stephenswat · Aug 18, 2025 · Aug 18, 2025 · Aug 18, 2025 · Aug 18, 2025
diff --git a/examples/run/alpaka/full_chain_algorithm.cpp b/examples/run/alpaka/full_chain_algorithm.cpp
@@ -29,9 +29,8 @@ full_chain_algorithm::full_chain_algorithm(
       m_queue(),
       m_vecmem_objects(m_queue),
       m_host_mr(host_mr),
-      m_cached_device_mr(
-          std::make_unique<::vecmem::binary_page_memory_resource>(
-              m_vecmem_objects.device_mr())),
+      m_cached_pinned_host_mr(m_vecmem_objects.host_mr()),
+      m_cached_device_mr(m_vecmem_objects.device_mr()),
       m_field_vec{0.f, 0.f, finder_config.bFieldInZ},
       m_field(field),
       m_det_descr(det_descr),
@@ -40,29 +39,27 @@ full_chain_algorithm::full_chain_algorithm(
               m_det_descr.get().size()),
           m_vecmem_objects.device_mr()),
       m_detector(detector),
-      m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_clusterization({m_cached_device_mr, &m_cached_pinned_host_mr},
                        m_vecmem_objects.async_copy(), m_queue,
                        clustering_config),
-      m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_measurement_sorting({m_cached_device_mr, &m_cached_pinned_host_mr},
                             m_vecmem_objects.async_copy(), m_queue,
                             logger->cloneWithSuffix("MeasSortingAlg")),
-      m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_spacepoint_formation({m_cached_device_mr, &m_cached_pinned_host_mr},
                              m_vecmem_objects.async_copy(), m_queue,
                              logger->cloneWithSuffix("SpFormationAlg")),
       m_seeding(finder_config, grid_config, filter_config,
-                memory_resource{*m_cached_device_mr, &m_host_mr},
+                {m_cached_device_mr, &m_cached_pinned_host_mr},
                 m_vecmem_objects.async_copy(), m_queue,
                 logger->cloneWithSuffix("SeedingAlg")),
       m_track_parameter_estimation(
-          memory_resource{*m_cached_device_mr, &m_host_mr},
+          {m_cached_device_mr, &m_cached_pinned_host_mr},
           m_vecmem_objects.async_copy(), m_queue,
           logger->cloneWithSuffix("TrackParamEstAlg")),
-      m_finding(finding_config,
-                memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_finding(finding_config, {m_cached_device_mr, &m_cached_pinned_host_mr},
                 m_vecmem_objects.async_copy(), m_queue,
                 logger->cloneWithSuffix("TrackFindingAlg")),
-      m_fitting(fitting_config,
-                memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_fitting(fitting_config, {m_cached_device_mr, &m_cached_pinned_host_mr},
                 m_vecmem_objects.async_copy(), m_queue,
                 logger->cloneWithSuffix("TrackFittingAlg")),
       m_clustering_config(clustering_config),
@@ -92,9 +89,8 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
       m_queue(),
       m_vecmem_objects(m_queue),
       m_host_mr(parent.m_host_mr),
-      m_cached_device_mr(
-          std::make_unique<::vecmem::binary_page_memory_resource>(
-              m_vecmem_objects.device_mr())),
+      m_cached_pinned_host_mr(m_vecmem_objects.host_mr()),
+      m_cached_device_mr(m_vecmem_objects.device_mr()),
       m_field_vec(parent.m_field_vec),
       m_field(parent.m_field),
       m_det_descr(parent.m_det_descr),
@@ -103,30 +99,30 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
               m_det_descr.get().size()),
           m_vecmem_objects.device_mr()),
       m_detector(parent.m_detector),
-      m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_clusterization({m_cached_device_mr, &m_cached_pinned_host_mr},
                        m_vecmem_objects.async_copy(), m_queue,
                        parent.m_clustering_config),
-      m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_measurement_sorting({m_cached_device_mr, &m_cached_pinned_host_mr},
                             m_vecmem_objects.async_copy(), m_queue,
                             parent.logger().cloneWithSuffix("MeasSortingAlg")),
-      m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
+      m_spacepoint_formation({m_cached_device_mr, &m_cached_pinned_host_mr},
                              m_vecmem_objects.async_copy(), m_queue,
                              parent.logger().cloneWithSuffix("SpFormationAlg")),
       m_seeding(parent.m_finder_config, parent.m_grid_config,
                 parent.m_filter_config,
-                memory_resource{*m_cached_device_mr, &m_host_mr},
+                {m_cached_device_mr, &m_cached_pinned_host_mr},
                 m_vecmem_objects.async_copy(), m_queue,
                 parent.logger().cloneWithSuffix("SeedingAlg")),
       m_track_parameter_estimation(
-          memory_resource{*m_cached_device_mr, &m_host_mr},
+          {m_cached_device_mr, &m_cached_pinned_host_mr},
           m_vecmem_objects.async_copy(), m_queue,
           parent.logger().cloneWithSuffix("TrackParamEstAlg")),
       m_finding(parent.m_finding_config,
-                memory_resource{*m_cached_device_mr, &m_host_mr},
+                {m_cached_device_mr, &m_cached_pinned_host_mr},
                 m_vecmem_objects.async_copy(), m_queue,
                 parent.logger().cloneWithSuffix("TrackFindingAlg")),
       m_fitting(parent.m_fitting_config,
-                memory_resource{*m_cached_device_mr, &m_host_mr},
+                {m_cached_device_mr, &m_cached_pinned_host_mr},
                 m_vecmem_objects.async_copy(), m_queue,
                 parent.logger().cloneWithSuffix("TrackFittingAlg")),
       m_clustering_config(parent.m_clustering_config),
@@ -156,7 +152,7 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(
 
     // Create device copy of input collections
     edm::silicon_cell_collection::buffer cells_buffer(
-        static_cast<unsigned int>(cells.size()), *m_cached_device_mr);
+        static_cast<unsigned int>(cells.size()), m_cached_device_mr);
     m_vecmem_objects.async_copy()(::vecmem::get_data(cells), cells_buffer)
         ->ignore();
 
@@ -184,8 +180,12 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(
             m_device_detector_view, m_field, {track_candidates, measurements});
 
         // Copy a limited amount of result data back to the host.
+        const auto host_tracks = m_vecmem_objects.async_copy().to(
+            track_states.tracks, m_cached_pinned_host_mr, nullptr,
+            ::vecmem::copy::type::device_to_host);
         output_type result{m_host_mr};
-        m_vecmem_objects.async_copy()(track_states.tracks, result)->wait();
+        ::vecmem::copy host_copy;
+        host_copy(host_tracks, result)->wait();
         return result;
 
     }
@@ -207,7 +207,7 @@ bound_track_parameters_collection_types::host full_chain_algorithm::seeding(
 
     // Create device copy of input collections
     edm::silicon_cell_collection::buffer cells_buffer(
-        static_cast<unsigned int>(cells.size()), *m_cached_device_mr);
+        static_cast<unsigned int>(cells.size()), m_cached_device_mr);
     m_vecmem_objects.async_copy()(::vecmem::get_data(cells), cells_buffer)
         ->ignore();
 
@@ -227,8 +227,12 @@ bound_track_parameters_collection_types::host full_chain_algorithm::seeding(
                                          m_seeding(spacepoints), m_field_vec);
 
         // Copy a limited amount of result data back to the host.
+        const auto host_seeds = m_vecmem_objects.async_copy().to(
+            track_params, m_cached_pinned_host_mr,
+            ::vecmem::copy::type::device_to_host);
         bound_track_parameters_collection_types::host result{&m_host_mr};
-        m_vecmem_objects.async_copy()(track_params, result)->wait();
+        ::vecmem::copy host_copy;
+        host_copy(host_seeds, result)->wait();
         return result;
 
     }

diff --git a/examples/run/alpaka/full_chain_algorithm.hpp b/examples/run/alpaka/full_chain_algorithm.hpp
@@ -125,8 +125,10 @@ class full_chain_algorithm
 
     /// Host memory resource
     ::vecmem::memory_resource& m_host_mr;
+    /// Cached pinned host memory resource
+    mutable ::vecmem::binary_page_memory_resource m_cached_pinned_host_mr;
     /// Device caching memory resource
-    std::unique_ptr<::vecmem::binary_page_memory_resource> m_cached_device_mr;
+    mutable ::vecmem::binary_page_memory_resource m_cached_device_mr;
 
     /// Constant B field for the (seed) track parameter estimation
     traccc::vector3 m_field_vec;

diff --git a/examples/run/alpaka/throughput_mt.cpp b/examples/run/alpaka/throughput_mt.cpp
@@ -13,9 +13,6 @@
 int main(int argc, char* argv[]) {
 
     // Execute the throughput test.
-    static const bool use_host_caching = true;
-    return traccc::throughput_mt<traccc::alpaka::full_chain_algorithm,
-                                 vecmem::host_memory_resource>(
-        "Multi-threaded Alpaka GPU throughput tests", argc, argv,
-        use_host_caching);
+    return traccc::throughput_mt<traccc::alpaka::full_chain_algorithm>(
+        "Multi-threaded Alpaka GPU throughput tests", argc, argv);
 }
diff --git a/examples/run/alpaka/throughput_st.cpp b/examples/run/alpaka/throughput_st.cpp
@@ -13,9 +13,6 @@
 int main(int argc, char* argv[]) {
 
     // Execute the throughput test.
-    static const bool use_host_caching = true;
-    return traccc::throughput_st<traccc::alpaka::full_chain_algorithm,
-                                 vecmem::host_memory_resource>(
-        "Single-threaded Alpaka GPU throughput tests", argc, argv,
-        use_host_caching);
+    return traccc::throughput_st<traccc::alpaka::full_chain_algorithm>(
+        "Single-threaded Alpaka GPU throughput tests", argc, argv);
 }
diff --git a/examples/run/common/throughput_mt.hpp b/examples/run/common/throughput_mt.hpp
@@ -1,15 +1,12 @@
 /** TRACCC library, part of the ACTS project (R&D line)
  *
- * (c) 2022 CERN for the benefit of the ACTS project
+ * (c) 2022-2025 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
 
 #pragma once
 
-// VecMem include(s).
-#include <vecmem/memory/host_memory_resource.hpp>
-
 // System include(s).
 #include <string_view>
 
@@ -18,18 +15,15 @@ namespace traccc {
 /// Helper function running a multi-threaded throughput test
 ///
 /// @tparam FULL_CHAIN_ALG The type of the full chain algorithm to use
-/// @tparam HOST_MR The host memory resource type to use
+///
 /// @param description A short description of the application
 /// @param argc The count of command line arguments (from @c main(...))
 /// @param argv The command line arguments (from @c main(...))
-/// @param use_host_caching Flag specifying whether host-side memory caching
-///                         should be used
+///
 /// @return The value to be returned from @c main(...)
 ///
-template <typename FULL_CHAIN_ALG,
-          typename HOST_MR = vecmem::host_memory_resource>
-int throughput_mt(std::string_view description, int argc, char* argv[],
-                  bool use_host_caching = false);
+template <typename FULL_CHAIN_ALG>
+int throughput_mt(std::string_view description, int argc, char* argv[]);
 
 }  // namespace traccc
 

diff --git a/examples/run/common/throughput_mt.ipp b/examples/run/common/throughput_mt.ipp
@@ -38,7 +38,7 @@
 #include "traccc/performance/timing_info.hpp"
 
 // VecMem include(s).
-#include <vecmem/memory/binary_page_memory_resource.hpp>
+#include <vecmem/memory/host_memory_resource.hpp>
 
 // TBB include(s).
 #include <tbb/global_control.h>
@@ -61,9 +61,9 @@
 
 namespace traccc {
 
-template <typename FULL_CHAIN_ALG, typename HOST_MR>
-int throughput_mt(std::string_view description, int argc, char* argv[],
-                  bool use_host_caching) {
+template <typename FULL_CHAIN_ALG>
+int throughput_mt(std::string_view description, int argc, char* argv[]) {
+
     std::unique_ptr<const traccc::Logger> ilogger = traccc::getDefaultLogger(
         "ThroughputExample", traccc::Logging::Level::INFO);
     TRACCC_LOCAL_LOGGER(std::move(ilogger));
@@ -92,36 +92,36 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
     performance::timing_info times;
 
     // Memory resource to use in the test.
-    HOST_MR uncached_host_mr;
+    vecmem::host_memory_resource host_mr;
 
     // Construct the detector description object.
-    traccc::silicon_detector_description::host det_descr{uncached_host_mr};
+    traccc::silicon_detector_description::host det_descr{host_mr};
     traccc::io::read_detector_description(
         det_descr, detector_opts.detector_file, detector_opts.digitization_file,
         (detector_opts.use_detray_detector ? traccc::data_format::json
                                            : traccc::data_format::csv));
 
     // Construct a Detray detector object, if supported by the configuration.
-    traccc::default_detector::host detector{uncached_host_mr};
+    traccc::default_detector::host detector{host_mr};
     if (detector_opts.use_detray_detector) {
         traccc::io::read_detector(
-            detector, uncached_host_mr, detector_opts.detector_file,
+            detector, host_mr, detector_opts.detector_file,
             detector_opts.material_file, detector_opts.grid_file);
     }
 
     // Construct the magnetic field object.
     const auto field = details::make_magnetic_field(bfield_opts);
 
     // Read in all input events into memory.
-    vecmem::vector<edm::silicon_cell_collection::host> input{&uncached_host_mr};
+    vecmem::vector<edm::silicon_cell_collection::host> input{&host_mr};
     {
         performance::timer t{"File reading", times};
         // Set up the container for the input events.
         input.reserve(input_opts.events);
         const std::size_t first_event = input_opts.skip;
         const std::size_t last_event = input_opts.skip + input_opts.events;
         for (std::size_t i = first_event; i < last_event; ++i) {
-            input.emplace_back(uncached_host_mr);
+            input.emplace_back(host_mr);
         }
         // Read the input cells into memory in parallel.
         tbb::parallel_for(
@@ -138,19 +138,6 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
             });
     }
 
-    // Set up cached memory resources on top of the host memory resource
-    // separately for each CPU thread.
-    std::vector<std::unique_ptr<vecmem::binary_page_memory_resource> >
-        cached_host_mrs;
-    if (use_host_caching) {
-        cached_host_mrs.reserve(threading_opts.threads + 1);
-        for (std::size_t i = 0; i < threading_opts.threads + 1; ++i) {
-            cached_host_mrs.push_back(
-                std::make_unique<vecmem::binary_page_memory_resource>(
-                    uncached_host_mr));
-        }
-    }
-
     // Algorithm configuration(s).
     typename FULL_CHAIN_ALG::clustering_algorithm::config_type clustering_cfg(
         clusterization_opts);
@@ -170,16 +157,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
     std::vector<FULL_CHAIN_ALG> algs;
     algs.reserve(threading_opts.threads + 1);
     for (std::size_t i = 0; i < threading_opts.threads + 1; ++i) {
-
-        vecmem::memory_resource& alg_host_mr =
-            use_host_caching
-                ? static_cast<vecmem::memory_resource&>(
-                      *(cached_host_mrs.at(i)))
-                : static_cast<vecmem::memory_resource&>(uncached_host_mr);
         algs.push_back(
-            {alg_host_mr, clustering_cfg, seedfinder_config,
-             spacepoint_grid_config, seedfilter_config, finding_cfg,
-             fitting_cfg, det_descr, field,
+            {host_mr, clustering_cfg, seedfinder_config, spacepoint_grid_config,
+             seedfilter_config, finding_cfg, fitting_cfg, det_descr, field,
              (detector_opts.use_detray_detector ? &detector : nullptr),
              logger().clone()});
     }
@@ -304,10 +284,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
         group.wait();
     }
 
-    // Delete the algorithms and host memory caches explicitly before their
-    // parent object would go out of scope.
+    // Delete the algorithms explicitly before their parent object would go out
+    // of scope.
     algs.clear();
-    cached_host_mrs.clear();
 
     // Print some results.
     TRACCC_INFO("Reconstructed track parameters: " << rec_track_params.load());

diff --git a/examples/run/common/throughput_st.hpp b/examples/run/common/throughput_st.hpp
@@ -1,18 +1,12 @@
 /** TRACCC library, part of the ACTS project (R&D line)
  *
- * (c) 2022 CERN for the benefit of the ACTS project
+ * (c) 2022-2025 CERN for the benefit of the ACTS project
  *
  * Mozilla Public License Version 2.0
  */
 
 #pragma once
 
-// Projection include(s).
-#include "traccc/seeding/detail/seeding_config.hpp"
-
-// VecMem include(s).
-#include <vecmem/memory/host_memory_resource.hpp>
-
 // System include(s).
 #include <string_view>
 
@@ -21,18 +15,15 @@ namespace traccc {
 /// Helper function running a single-threaded throughput test
 ///
 /// @tparam FULL_CHAIN_ALG The type of the full chain algorithm to use
-/// @tparam HOST_MR The host memory resource type to use
+///
 /// @param description A short description of the application
 /// @param argc The count of command line arguments (from @c main(...))
 /// @param argv The command line arguments (from @c main(...))
-/// @param use_host_caching Flag specifying whether host-side memory caching
-///                         should be used
+///
 /// @return The value to be returned from @c main(...)
 ///
-template <typename FULL_CHAIN_ALG,
-          typename HOST_MR = vecmem::host_memory_resource>
-int throughput_st(std::string_view description, int argc, char* argv[],
-                  bool use_host_caching = false);
+template <typename FULL_CHAIN_ALG>
+int throughput_st(std::string_view description, int argc, char* argv[]);
 
 }  // namespace traccc