Skip to content

Commit 78c05ee

Browse files
committed
Producer consumer impl
1 parent bed3c09 commit 78c05ee

File tree

10 files changed

+800
-9
lines changed

10 files changed

+800
-9
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ if(${CMAKE_CXX_STANDARD} LESS 20)
2727
)
2828
endif()
2929

30+
# Set the CUDA architecture to build code for.
31+
set(CMAKE_CUDA_ARCHITECTURES
32+
75
33+
CACHE STRING
34+
"CUDA architectures to build device code for"
35+
)
36+
3037
# CMake include(s).
3138
include(CMakeDependentOption)
3239
include(GNUInstallDirs)

cmake/detray-compiler-options-cuda.cmake

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,6 @@ if(PROJECT_IS_TOP_LEVEL)
1919
detray_add_flag( CMAKE_CUDA_FLAGS "-Xcompiler /Zc:__cplusplus" )
2020
endif()
2121

22-
# Set the CUDA architecture to build code for.
23-
set(CMAKE_CUDA_ARCHITECTURES
24-
"52"
25-
CACHE STRING
26-
"CUDA architectures to build device code for"
27-
)
28-
2922
if("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
3023
# Allow to use functions in device code that are constexpr, even if they are
3124
# not marked with __device__.

core/include/detray/propagator/rk_stepper.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class rk_stepper final
161161
scalar_type m_next_step_size{0.f};
162162

163163
/// Magnetic field view
164-
const magnetic_field_t m_magnetic_field;
164+
magnetic_field_t m_magnetic_field;
165165
};
166166

167167
/// Take a step, using an adaptive Runge-Kutta algorithm.

tests/benchmarks/cuda/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,13 @@ foreach(algebra ${algebra_plugins})
3838
PRIVATE "-march=native" "-ftree-vectorize"
3939
)
4040
endforeach()
41+
42+
detray_add_executable(cuda_propagation
43+
"propagation_new.cpp"
44+
LINK_LIBRARIES detray::benchmark_cuda_array detray::core_array vecmem::cuda detray::test_common
45+
)
46+
47+
target_compile_options(
48+
detray_cuda_propagation
49+
PRIVATE "-march=native" "-ftree-vectorize"
50+
)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/** Detray library, part of the ACTS project (R&D line)
2+
*
3+
* (c) 2024 CERN for the benefit of the ACTS project
4+
*
5+
* Mozilla Public License Version 2.0
6+
*/
7+
8+
// Project include(s)
9+
#include "detray/navigation/navigator.hpp"
10+
#include "detray/propagator/actors.hpp"
11+
#include "detray/propagator/rk_stepper.hpp"
12+
#include "detray/tracks/tracks.hpp"
13+
14+
// Detray benchmark include(s)
15+
#include "detray/benchmarks/device/cuda/propagator.hpp"
16+
#include "detray/benchmarks/propagation_benchmark_utils.hpp"
17+
#include "detray/benchmarks/types.hpp"
18+
19+
// Detray test include(s)
20+
#include "detray/test/common/bfield.hpp"
21+
#include "detray/test/common/build_toy_detector.hpp"
22+
#include "detray/test/common/track_generators.hpp"
23+
24+
// Vecmem include(s)
25+
#include <vecmem/memory/cuda/device_memory_resource.hpp>
26+
#include <vecmem/memory/host_memory_resource.hpp>
27+
28+
// System include(s)
29+
#include <chrono>
30+
#include <ctime>
31+
#include <iostream>
32+
#include <ratio>
33+
#include <string>
34+
35+
using namespace detray;
36+
37+
int main(int argc, char** argv) {
38+
39+
using metadata_t = benchmarks::toy_metadata;
40+
using toy_detector_t = detector<metadata_t>;
41+
using algebra_t = typename toy_detector_t::algebra_type;
42+
using scalar = dscalar<algebra_t>;
43+
using vector3 = dvector3D<algebra_t>;
44+
45+
using free_track_parameters_t = free_track_parameters<algebra_t>;
46+
using uniform_gen_t =
47+
detail::random_numbers<scalar, std::uniform_real_distribution<scalar>>;
48+
using track_generator_t =
49+
random_track_generator<free_track_parameters_t, uniform_gen_t>;
50+
using field_bknd_t = bfield::const_bknd_t<benchmarks::scalar>;
51+
52+
vecmem::host_memory_resource host_mr;
53+
vecmem::cuda::device_memory_resource dev_mr;
54+
55+
//
56+
// Configuration
57+
//
58+
59+
// Constant magnetic field
60+
vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
61+
62+
// Configure toy detector
63+
toy_det_config<scalar> toy_cfg{};
64+
toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u);
65+
66+
std::cout << toy_cfg << std::endl;
67+
68+
// Configure propagation
69+
propagation::config prop_cfg{};
70+
prop_cfg.navigation.search_window = {3u, 3u};
71+
72+
std::cout << prop_cfg << std::endl;
73+
74+
//
75+
// Prepare data
76+
//
77+
// Generate track sample for strong scaling
78+
track_generator_t::configuration trk_cfg{};
79+
trk_cfg.n_tracks(10u);
80+
trk_cfg.seed(detail::random_numbers<scalar>::default_seed());
81+
82+
track_generator_t trk_gen{trk_cfg};
83+
84+
dvector<free_track_parameters_t> single_sample =
85+
detray::benchmarks::generate_tracks(&host_mr, trk_gen);
86+
87+
const auto [toy_det, names] =
88+
build_toy_detector<algebra_t>(host_mr, toy_cfg);
89+
90+
auto bfield = create_const_field<scalar>(B);
91+
92+
pointwise_material_interactor<algebra_t>::state interactor_state{};
93+
94+
auto actor_states = detail::make_tuple<dtuple>(interactor_state);
95+
96+
//
97+
// Register benchmarks
98+
//
99+
std::cout << "----------------------\n"
100+
<< "Propagation Test\n"
101+
<< "----------------------\n\n";
102+
103+
using navigator_t = navigator_type<metadata_t>;
104+
using stepper_t = stepper_type<metadata_t, field_bknd_t>;
105+
using actor_chain_t = default_chain<algebra_t>;
106+
107+
prop_cfg.stepping.do_covariance_transport = true;
108+
cuda_propagation<navigator_t, stepper_t, actor_chain_t> propagator{
109+
prop_cfg};
110+
111+
std::chrono::high_resolution_clock::time_point t1 =
112+
std::chrono::high_resolution_clock::now();
113+
propagator(&dev_mr, &toy_det, &bfield, &single_sample, &actor_states);
114+
std::chrono::high_resolution_clock::time_point t2 =
115+
std::chrono::high_resolution_clock::now();
116+
117+
const auto time_span =
118+
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
119+
120+
std::cout << "It took me " << time_span << " for " << trk_cfg.n_tracks()
121+
<< " tracks" << std::endl;
122+
}

tests/benchmarks/include/detray/benchmarks/device/cuda/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ foreach(algebra ${algebra_plugins})
2828
STATIC
2929
"propagation_benchmark.hpp"
3030
"propagation_benchmark.cu"
31+
"propagator.hpp"
32+
"propagator.cu"
3133
)
3234

3335
add_library(
@@ -43,4 +45,9 @@ foreach(algebra ${algebra_plugins})
4345
detray::test_common
4446
detray::core_${algebra}
4547
)
48+
49+
set_property(
50+
TARGET detray_benchmark_cuda_${algebra}
51+
PROPERTY CUDA_ARCHITECTURES 75
52+
)
4653
endforeach()

tests/benchmarks/include/detray/benchmarks/device/cuda/propagation_benchmark.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ template <typename propagator_t>
9292
void release_actor_states(
9393
typename propagator_t::actor_chain_type::state_tuple *);
9494

95-
/// Device Propagation becnhmark
95+
/// Device Propagation benchmark
9696
template <typename propagator_t, typename bfield_bknd_t,
9797
detray::benchmarks::propagation_opt kOPT =
9898
detray::benchmarks::propagation_opt::e_unsync>

0 commit comments

Comments
 (0)