2323
2424// Vecmem include(s)
2525#include < vecmem/memory/cuda/device_memory_resource.hpp>
26+ #include < vecmem/memory/cuda/host_memory_resource.hpp>
2627#include < vecmem/memory/host_memory_resource.hpp>
2728
2829// System include(s)
@@ -49,13 +50,35 @@ int main(int argc, char** argv) {
4950 random_track_generator<free_track_parameters_t , uniform_gen_t >;
5051 using field_bknd_t = bfield::const_bknd_t <benchmarks::scalar>;
5152
52- vecmem::host_memory_resource host_mr;
53+ // vecmem::host_memory_resource host_mr;
54+ vecmem::cuda::host_memory_resource host_mr; // < pinned memory
5355 vecmem::cuda::device_memory_resource dev_mr;
5456
57+ // Device info
58+ /* int nDevices;
59+ cudaGetDeviceCount(&nDevices);
60+ for (int i = 0; i < nDevices; i++) {
61+ cudaDeviceProp prop;
62+ cudaGetDeviceProperties(&prop, i);
63+ std::cout << "Device Number: " << i << std::endl;
64+ std::cout << " Device name: " << prop.name<< std::endl;
65+ std::cout << " Memory Clock Rate (KHz): " <<
66+ prop.memoryClockRate<< std::endl;
67+ std::cout << " Memory Bus Width (bits): " <<
68+ prop.memoryBusWidth<< std::endl;
69+ std::cout << " Peak Memory Bandwidth (GB/s): " <<
70+ 2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6<< std::endl;
71+ }*/
72+
5573 //
5674 // Configuration
5775 //
5876
77+ std::size_t n_tracks{10000u };
78+ if (argc > 1 ) {
79+ n_tracks = static_cast <std::size_t >(atoi (argv[1 ]));
80+ }
81+
5982 // Constant magnetic field
6083 vector3 B{0 .f , 0 .f , 2 .f * unit<scalar>::T};
6184
@@ -76,13 +99,15 @@ int main(int argc, char** argv) {
7699 //
77100 // Generate track sample for strong scaling
78101 track_generator_t ::configuration trk_cfg{};
79- trk_cfg.n_tracks (10u );
102+ trk_cfg.n_tracks (n_tracks );
80103 trk_cfg.seed (detail::random_numbers<scalar>::default_seed ());
81104
105+ std::cout << trk_cfg << std::endl;
106+
82107 track_generator_t trk_gen{trk_cfg};
83108
84109 dvector<free_track_parameters_t > single_sample =
85- detray::benchmarks::generate_tracks (&host_mr, trk_gen);
110+ detray::benchmarks::generate_tracks (&host_mr, trk_gen, true );
86111
87112 const auto [toy_det, names] =
88113 build_toy_detector<algebra_t >(host_mr, toy_cfg);
@@ -96,7 +121,7 @@ int main(int argc, char** argv) {
96121 //
97122 // Register benchmarks
98123 //
99- std::cout << " ----------------------\n "
124+ std::cout << " \n ----------------------\n "
100125 << " Propagation Test\n "
101126 << " ----------------------\n\n " ;
102127
@@ -114,9 +139,12 @@ int main(int argc, char** argv) {
114139 std::chrono::high_resolution_clock::time_point t2 =
115140 std::chrono::high_resolution_clock::now ();
116141
117- const auto time_span =
118- std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
142+ const auto total_time =
143+ std::chrono::duration_cast<std::chrono::duration<double >>(t2 - t1);
144+ const double total_time_ms{total_time.count () * 1000 .};
119145
120- std::cout << " It took me " << time_span << " for " << trk_cfg.n_tracks ()
121- << " tracks" << std::endl;
146+ // Assumption: 1 event = 3000 truth tracks + 2 seeds per track
147+ std::cout << " It took: " << total_time_ms << " ms ("
148+ << total_time_ms / (static_cast <double >(n_tracks) / 9000 .)
149+ << " ms/evt)" << std::endl;
122150}
0 commit comments