Producer consumer impl

niermann999 · niermann999 · commit 78c05eebdf1f · 2025-08-05T13:49:40.000+02:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,6 +27,13 @@ if(${CMAKE_CXX_STANDARD} LESS 20)
     )
 endif()
 
+# Set the CUDA architecture to build code for.
+set(CMAKE_CUDA_ARCHITECTURES
+    75
+    CACHE STRING
+    "CUDA architectures to build device code for"
+)
+
 # CMake include(s).
 include(CMakeDependentOption)
 include(GNUInstallDirs)
diff --git a/cmake/detray-compiler-options-cuda.cmake b/cmake/detray-compiler-options-cuda.cmake
@@ -19,13 +19,6 @@ if(PROJECT_IS_TOP_LEVEL)
         detray_add_flag( CMAKE_CUDA_FLAGS "-Xcompiler /Zc:__cplusplus" )
     endif()
 
-    # Set the CUDA architecture to build code for.
-    set(CMAKE_CUDA_ARCHITECTURES
-        "52"
-        CACHE STRING
-        "CUDA architectures to build device code for"
-    )
-
     if("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
         # Allow to use functions in device code that are constexpr, even if they are
         # not marked with __device__.
diff --git a/core/include/detray/propagator/rk_stepper.hpp b/core/include/detray/propagator/rk_stepper.hpp
@@ -161,7 +161,7 @@ class rk_stepper final
         scalar_type m_next_step_size{0.f};
 
         /// Magnetic field view
-        const magnetic_field_t m_magnetic_field;
+        magnetic_field_t m_magnetic_field;
     };
 
     /// Take a step, using an adaptive Runge-Kutta algorithm.
diff --git a/tests/benchmarks/cuda/CMakeLists.txt b/tests/benchmarks/cuda/CMakeLists.txt
@@ -38,3 +38,13 @@ foreach(algebra ${algebra_plugins})
         PRIVATE "-march=native" "-ftree-vectorize"
     )
 endforeach()
+
+detray_add_executable(cuda_propagation
+    "propagation_new.cpp"
+    LINK_LIBRARIES detray::benchmark_cuda_array detray::core_array vecmem::cuda detray::test_common
+)
+
+target_compile_options(
+    detray_cuda_propagation
+    PRIVATE "-march=native" "-ftree-vectorize"
+)
diff --git a/tests/benchmarks/cuda/propagation_new.cpp b/tests/benchmarks/cuda/propagation_new.cpp
@@ -0,0 +1,122 @@
+/** Detray library, part of the ACTS project (R&D line)
+ *
+ * (c) 2024 CERN for the benefit of the ACTS project
+ *
+ * Mozilla Public License Version 2.0
+ */
+
+// Project include(s)
+#include "detray/navigation/navigator.hpp"
+#include "detray/propagator/actors.hpp"
+#include "detray/propagator/rk_stepper.hpp"
+#include "detray/tracks/tracks.hpp"
+
+// Detray benchmark include(s)
+#include "detray/benchmarks/device/cuda/propagator.hpp"
+#include "detray/benchmarks/propagation_benchmark_utils.hpp"
+#include "detray/benchmarks/types.hpp"
+
+// Detray test include(s)
+#include "detray/test/common/bfield.hpp"
+#include "detray/test/common/build_toy_detector.hpp"
+#include "detray/test/common/track_generators.hpp"
+
+// Vecmem include(s)
+#include <vecmem/memory/cuda/device_memory_resource.hpp>
+#include <vecmem/memory/host_memory_resource.hpp>
+
+// System include(s)
+#include <chrono>
+#include <ctime>
+#include <iostream>
+#include <ratio>
+#include <string>
+
+using namespace detray;
+
+int main(int argc, char** argv) {
+
+    using metadata_t = benchmarks::toy_metadata;
+    using toy_detector_t = detector<metadata_t>;
+    using algebra_t = typename toy_detector_t::algebra_type;
+    using scalar = dscalar<algebra_t>;
+    using vector3 = dvector3D<algebra_t>;
+
+    using free_track_parameters_t = free_track_parameters<algebra_t>;
+    using uniform_gen_t =
+        detail::random_numbers<scalar, std::uniform_real_distribution<scalar>>;
+    using track_generator_t =
+        random_track_generator<free_track_parameters_t, uniform_gen_t>;
+    using field_bknd_t = bfield::const_bknd_t<benchmarks::scalar>;
+
+    vecmem::host_memory_resource host_mr;
+    vecmem::cuda::device_memory_resource dev_mr;
+
+    //
+    // Configuration
+    //
+
+    // Constant magnetic field
+    vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
+
+    // Configure toy detector
+    toy_det_config<scalar> toy_cfg{};
+    toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u);
+
+    std::cout << toy_cfg << std::endl;
+
+    // Configure propagation
+    propagation::config prop_cfg{};
+    prop_cfg.navigation.search_window = {3u, 3u};
+
+    std::cout << prop_cfg << std::endl;
+
+    //
+    // Prepare data
+    //
+    // Generate track sample for strong scaling
+    track_generator_t::configuration trk_cfg{};
+    trk_cfg.n_tracks(10u);
+    trk_cfg.seed(detail::random_numbers<scalar>::default_seed());
+
+    track_generator_t trk_gen{trk_cfg};
+
+    dvector<free_track_parameters_t> single_sample =
+        detray::benchmarks::generate_tracks(&host_mr, trk_gen);
+
+    const auto [toy_det, names] =
+        build_toy_detector<algebra_t>(host_mr, toy_cfg);
+
+    auto bfield = create_const_field<scalar>(B);
+
+    pointwise_material_interactor<algebra_t>::state interactor_state{};
+
+    auto actor_states = detail::make_tuple<dtuple>(interactor_state);
+
+    //
+    // Register benchmarks
+    //
+    std::cout << "----------------------\n"
+              << "Propagation Test\n"
+              << "----------------------\n\n";
+
+    using navigator_t = navigator_type<metadata_t>;
+    using stepper_t = stepper_type<metadata_t, field_bknd_t>;
+    using actor_chain_t = default_chain<algebra_t>;
+
+    prop_cfg.stepping.do_covariance_transport = true;
+    cuda_propagation<navigator_t, stepper_t, actor_chain_t> propagator{
+        prop_cfg};
+
+    std::chrono::high_resolution_clock::time_point t1 =
+        std::chrono::high_resolution_clock::now();
+    propagator(&dev_mr, &toy_det, &bfield, &single_sample, &actor_states);
+    std::chrono::high_resolution_clock::time_point t2 =
+        std::chrono::high_resolution_clock::now();
+
+    const auto time_span =
+        std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
+
+    std::cout << "It took me " << time_span << " for " << trk_cfg.n_tracks()
+              << " tracks" << std::endl;
+}
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt b/tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt
@@ -28,6 +28,8 @@ foreach(algebra ${algebra_plugins})
         STATIC
         "propagation_benchmark.hpp"
         "propagation_benchmark.cu"
+        "propagator.hpp"
+        "propagator.cu"
     )
 
     add_library(
@@ -43,4 +45,9 @@ foreach(algebra ${algebra_plugins})
             detray::test_common
             detray::core_${algebra}
     )
+
+    set_property(
+        TARGET detray_benchmark_cuda_${algebra}
+        PROPERTY CUDA_ARCHITECTURES 75
+    )
 endforeach()
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp
@@ -92,7 +92,7 @@ template <typename propagator_t>
 void release_actor_states(
     typename propagator_t::actor_chain_type::state_tuple *);
 
-/// Device Propagation becnhmark
+/// Device Propagation benchmark
 template <typename propagator_t, typename bfield_bknd_t,
           detray::benchmarks::propagation_opt kOPT =
               detray::benchmarks::propagation_opt::e_unsync>
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagator.cu b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagator.cu
diff --git a/tests/benchmarks/include/detray/benchmarks/device/cuda/propagator.hpp b/tests/benchmarks/include/detray/benchmarks/device/cuda/propagator.hpp
diff --git a/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp b/tests/benchmarks/include/detray/benchmarks/propagation_benchmark_utils.hpp