Skip to content

Commit 2dd726a

Browse files
committed
Call stepper and navigation
1 parent 78c05ee commit 2dd726a

File tree

3 files changed

+476
-177
lines changed

3 files changed

+476
-177
lines changed

tests/benchmarks/cuda/propagation_new.cpp

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
// Vecmem include(s)
2525
#include <vecmem/memory/cuda/device_memory_resource.hpp>
26+
#include <vecmem/memory/cuda/host_memory_resource.hpp>
2627
#include <vecmem/memory/host_memory_resource.hpp>
2728

2829
// System include(s)
@@ -49,13 +50,35 @@ int main(int argc, char** argv) {
4950
random_track_generator<free_track_parameters_t, uniform_gen_t>;
5051
using field_bknd_t = bfield::const_bknd_t<benchmarks::scalar>;
5152

52-
vecmem::host_memory_resource host_mr;
53+
// vecmem::host_memory_resource host_mr;
54+
vecmem::cuda::host_memory_resource host_mr; //< pinned memory
5355
vecmem::cuda::device_memory_resource dev_mr;
5456

57+
// Device info
58+
/*int nDevices;
59+
cudaGetDeviceCount(&nDevices);
60+
for (int i = 0; i < nDevices; i++) {
61+
cudaDeviceProp prop;
62+
cudaGetDeviceProperties(&prop, i);
63+
std::cout << "Device Number: " << i << std::endl;
64+
std::cout << " Device name: " << prop.name<< std::endl;
65+
std::cout << " Memory Clock Rate (KHz): " <<
66+
prop.memoryClockRate<< std::endl;
67+
std::cout << " Memory Bus Width (bits): " <<
68+
prop.memoryBusWidth<< std::endl;
69+
std::cout << " Peak Memory Bandwidth (GB/s): " <<
70+
2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6<< std::endl;
71+
}*/
72+
5573
//
5674
// Configuration
5775
//
5876

77+
std::size_t n_tracks{10000u};
78+
if (argc > 1) {
79+
n_tracks = static_cast<std::size_t>(atoi(argv[1]));
80+
}
81+
5982
// Constant magnetic field
6083
vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};
6184

@@ -76,13 +99,15 @@ int main(int argc, char** argv) {
7699
//
77100
// Generate track sample for strong scaling
78101
track_generator_t::configuration trk_cfg{};
79-
trk_cfg.n_tracks(10u);
102+
trk_cfg.n_tracks(n_tracks);
80103
trk_cfg.seed(detail::random_numbers<scalar>::default_seed());
81104

105+
std::cout << trk_cfg << std::endl;
106+
82107
track_generator_t trk_gen{trk_cfg};
83108

84109
dvector<free_track_parameters_t> single_sample =
85-
detray::benchmarks::generate_tracks(&host_mr, trk_gen);
110+
detray::benchmarks::generate_tracks(&host_mr, trk_gen, true);
86111

87112
const auto [toy_det, names] =
88113
build_toy_detector<algebra_t>(host_mr, toy_cfg);
@@ -96,7 +121,7 @@ int main(int argc, char** argv) {
96121
//
97122
// Register benchmarks
98123
//
99-
std::cout << "----------------------\n"
124+
std::cout << "\n----------------------\n"
100125
<< "Propagation Test\n"
101126
<< "----------------------\n\n";
102127

@@ -114,9 +139,12 @@ int main(int argc, char** argv) {
114139
std::chrono::high_resolution_clock::time_point t2 =
115140
std::chrono::high_resolution_clock::now();
116141

117-
const auto time_span =
118-
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
142+
const auto total_time =
143+
std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
144+
const double total_time_ms{total_time.count() * 1000.};
119145

120-
std::cout << "It took me " << time_span << " for " << trk_cfg.n_tracks()
121-
<< " tracks" << std::endl;
146+
// Assumption: 1 event = 3000 truth tracks + 2 seeds per track
147+
std::cout << "It took: " << total_time_ms << "ms ("
148+
<< total_time_ms / (static_cast<double>(n_tracks) / 9000.)
149+
<< " ms/evt)" << std::endl;
122150
}

0 commit comments

Comments
 (0)