Skip to content

Commit 2490295

Browse files
authored
Throughput Updates, main branch (2025.08.18.) (#1130)
* Removed central host memory caching from the throughput code. So that it would be left up to the individual full-chain algorithms to do with their host memory handling as they wished. * Modified the use of host memory caching in the algorithms.
1 parent 7f60115 commit 2490295

18 files changed

+186
-208
lines changed

examples/run/alpaka/full_chain_algorithm.cpp

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,8 @@ full_chain_algorithm::full_chain_algorithm(
2929
m_queue(),
3030
m_vecmem_objects(m_queue),
3131
m_host_mr(host_mr),
32-
m_cached_device_mr(
33-
std::make_unique<::vecmem::binary_page_memory_resource>(
34-
m_vecmem_objects.device_mr())),
32+
m_cached_pinned_host_mr(m_vecmem_objects.host_mr()),
33+
m_cached_device_mr(m_vecmem_objects.device_mr()),
3534
m_field_vec{0.f, 0.f, finder_config.bFieldInZ},
3635
m_field(field),
3736
m_det_descr(det_descr),
@@ -40,29 +39,27 @@ full_chain_algorithm::full_chain_algorithm(
4039
m_det_descr.get().size()),
4140
m_vecmem_objects.device_mr()),
4241
m_detector(detector),
43-
m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr},
42+
m_clusterization({m_cached_device_mr, &m_cached_pinned_host_mr},
4443
m_vecmem_objects.async_copy(), m_queue,
4544
clustering_config),
46-
m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
45+
m_measurement_sorting({m_cached_device_mr, &m_cached_pinned_host_mr},
4746
m_vecmem_objects.async_copy(), m_queue,
4847
logger->cloneWithSuffix("MeasSortingAlg")),
49-
m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
48+
m_spacepoint_formation({m_cached_device_mr, &m_cached_pinned_host_mr},
5049
m_vecmem_objects.async_copy(), m_queue,
5150
logger->cloneWithSuffix("SpFormationAlg")),
5251
m_seeding(finder_config, grid_config, filter_config,
53-
memory_resource{*m_cached_device_mr, &m_host_mr},
52+
{m_cached_device_mr, &m_cached_pinned_host_mr},
5453
m_vecmem_objects.async_copy(), m_queue,
5554
logger->cloneWithSuffix("SeedingAlg")),
5655
m_track_parameter_estimation(
57-
memory_resource{*m_cached_device_mr, &m_host_mr},
56+
{m_cached_device_mr, &m_cached_pinned_host_mr},
5857
m_vecmem_objects.async_copy(), m_queue,
5958
logger->cloneWithSuffix("TrackParamEstAlg")),
60-
m_finding(finding_config,
61-
memory_resource{*m_cached_device_mr, &m_host_mr},
59+
m_finding(finding_config, {m_cached_device_mr, &m_cached_pinned_host_mr},
6260
m_vecmem_objects.async_copy(), m_queue,
6361
logger->cloneWithSuffix("TrackFindingAlg")),
64-
m_fitting(fitting_config,
65-
memory_resource{*m_cached_device_mr, &m_host_mr},
62+
m_fitting(fitting_config, {m_cached_device_mr, &m_cached_pinned_host_mr},
6663
m_vecmem_objects.async_copy(), m_queue,
6764
logger->cloneWithSuffix("TrackFittingAlg")),
6865
m_clustering_config(clustering_config),
@@ -92,9 +89,8 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
9289
m_queue(),
9390
m_vecmem_objects(m_queue),
9491
m_host_mr(parent.m_host_mr),
95-
m_cached_device_mr(
96-
std::make_unique<::vecmem::binary_page_memory_resource>(
97-
m_vecmem_objects.device_mr())),
92+
m_cached_pinned_host_mr(m_vecmem_objects.host_mr()),
93+
m_cached_device_mr(m_vecmem_objects.device_mr()),
9894
m_field_vec(parent.m_field_vec),
9995
m_field(parent.m_field),
10096
m_det_descr(parent.m_det_descr),
@@ -103,30 +99,30 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
10399
m_det_descr.get().size()),
104100
m_vecmem_objects.device_mr()),
105101
m_detector(parent.m_detector),
106-
m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr},
102+
m_clusterization({m_cached_device_mr, &m_cached_pinned_host_mr},
107103
m_vecmem_objects.async_copy(), m_queue,
108104
parent.m_clustering_config),
109-
m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
105+
m_measurement_sorting({m_cached_device_mr, &m_cached_pinned_host_mr},
110106
m_vecmem_objects.async_copy(), m_queue,
111107
parent.logger().cloneWithSuffix("MeasSortingAlg")),
112-
m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
108+
m_spacepoint_formation({m_cached_device_mr, &m_cached_pinned_host_mr},
113109
m_vecmem_objects.async_copy(), m_queue,
114110
parent.logger().cloneWithSuffix("SpFormationAlg")),
115111
m_seeding(parent.m_finder_config, parent.m_grid_config,
116112
parent.m_filter_config,
117-
memory_resource{*m_cached_device_mr, &m_host_mr},
113+
{m_cached_device_mr, &m_cached_pinned_host_mr},
118114
m_vecmem_objects.async_copy(), m_queue,
119115
parent.logger().cloneWithSuffix("SeedingAlg")),
120116
m_track_parameter_estimation(
121-
memory_resource{*m_cached_device_mr, &m_host_mr},
117+
{m_cached_device_mr, &m_cached_pinned_host_mr},
122118
m_vecmem_objects.async_copy(), m_queue,
123119
parent.logger().cloneWithSuffix("TrackParamEstAlg")),
124120
m_finding(parent.m_finding_config,
125-
memory_resource{*m_cached_device_mr, &m_host_mr},
121+
{m_cached_device_mr, &m_cached_pinned_host_mr},
126122
m_vecmem_objects.async_copy(), m_queue,
127123
parent.logger().cloneWithSuffix("TrackFindingAlg")),
128124
m_fitting(parent.m_fitting_config,
129-
memory_resource{*m_cached_device_mr, &m_host_mr},
125+
{m_cached_device_mr, &m_cached_pinned_host_mr},
130126
m_vecmem_objects.async_copy(), m_queue,
131127
parent.logger().cloneWithSuffix("TrackFittingAlg")),
132128
m_clustering_config(parent.m_clustering_config),
@@ -156,7 +152,7 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(
156152

157153
// Create device copy of input collections
158154
edm::silicon_cell_collection::buffer cells_buffer(
159-
static_cast<unsigned int>(cells.size()), *m_cached_device_mr);
155+
static_cast<unsigned int>(cells.size()), m_cached_device_mr);
160156
m_vecmem_objects.async_copy()(::vecmem::get_data(cells), cells_buffer)
161157
->ignore();
162158

@@ -184,8 +180,12 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(
184180
m_device_detector_view, m_field, {track_candidates, measurements});
185181

186182
// Copy a limited amount of result data back to the host.
183+
const auto host_tracks = m_vecmem_objects.async_copy().to(
184+
track_states.tracks, m_cached_pinned_host_mr, nullptr,
185+
::vecmem::copy::type::device_to_host);
187186
output_type result{m_host_mr};
188-
m_vecmem_objects.async_copy()(track_states.tracks, result)->wait();
187+
::vecmem::copy host_copy;
188+
host_copy(host_tracks, result)->wait();
189189
return result;
190190

191191
}
@@ -207,7 +207,7 @@ bound_track_parameters_collection_types::host full_chain_algorithm::seeding(
207207

208208
// Create device copy of input collections
209209
edm::silicon_cell_collection::buffer cells_buffer(
210-
static_cast<unsigned int>(cells.size()), *m_cached_device_mr);
210+
static_cast<unsigned int>(cells.size()), m_cached_device_mr);
211211
m_vecmem_objects.async_copy()(::vecmem::get_data(cells), cells_buffer)
212212
->ignore();
213213

@@ -227,8 +227,12 @@ bound_track_parameters_collection_types::host full_chain_algorithm::seeding(
227227
m_seeding(spacepoints), m_field_vec);
228228

229229
// Copy a limited amount of result data back to the host.
230+
const auto host_seeds = m_vecmem_objects.async_copy().to(
231+
track_params, m_cached_pinned_host_mr,
232+
::vecmem::copy::type::device_to_host);
230233
bound_track_parameters_collection_types::host result{&m_host_mr};
231-
m_vecmem_objects.async_copy()(track_params, result)->wait();
234+
::vecmem::copy host_copy;
235+
host_copy(host_seeds, result)->wait();
232236
return result;
233237

234238
}

examples/run/alpaka/full_chain_algorithm.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,10 @@ class full_chain_algorithm
125125

126126
/// Host memory resource
127127
::vecmem::memory_resource& m_host_mr;
128+
/// Cached pinned host memory resource
129+
mutable ::vecmem::binary_page_memory_resource m_cached_pinned_host_mr;
128130
/// Device caching memory resource
129-
std::unique_ptr<::vecmem::binary_page_memory_resource> m_cached_device_mr;
131+
mutable ::vecmem::binary_page_memory_resource m_cached_device_mr;
130132

131133
/// Constant B field for the (seed) track parameter estimation
132134
traccc::vector3 m_field_vec;

examples/run/alpaka/throughput_mt.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
int main(int argc, char* argv[]) {
1414

1515
// Execute the throughput test.
16-
static const bool use_host_caching = true;
17-
return traccc::throughput_mt<traccc::alpaka::full_chain_algorithm,
18-
vecmem::host_memory_resource>(
19-
"Multi-threaded Alpaka GPU throughput tests", argc, argv,
20-
use_host_caching);
16+
return traccc::throughput_mt<traccc::alpaka::full_chain_algorithm>(
17+
"Multi-threaded Alpaka GPU throughput tests", argc, argv);
2118
}

examples/run/alpaka/throughput_st.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
int main(int argc, char* argv[]) {
1414

1515
// Execute the throughput test.
16-
static const bool use_host_caching = true;
17-
return traccc::throughput_st<traccc::alpaka::full_chain_algorithm,
18-
vecmem::host_memory_resource>(
19-
"Single-threaded Alpaka GPU throughput tests", argc, argv,
20-
use_host_caching);
16+
return traccc::throughput_st<traccc::alpaka::full_chain_algorithm>(
17+
"Single-threaded Alpaka GPU throughput tests", argc, argv);
2118
}

examples/run/common/throughput_mt.hpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2022 CERN for the benefit of the ACTS project
3+
* (c) 2022-2025 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
77

88
#pragma once
99

10-
// VecMem include(s).
11-
#include <vecmem/memory/host_memory_resource.hpp>
12-
1310
// System include(s).
1411
#include <string_view>
1512

@@ -18,18 +15,15 @@ namespace traccc {
1815
/// Helper function running a multi-threaded throughput test
1916
///
2017
/// @tparam FULL_CHAIN_ALG The type of the full chain algorithm to use
21-
/// @tparam HOST_MR The host memory resource type to use
18+
///
2219
/// @param description A short description of the application
2320
/// @param argc The count of command line arguments (from @c main(...))
2421
/// @param argv The command line arguments (from @c main(...))
25-
/// @param use_host_caching Flag specifying whether host-side memory caching
26-
/// should be used
22+
///
2723
/// @return The value to be returned from @c main(...)
2824
///
29-
template <typename FULL_CHAIN_ALG,
30-
typename HOST_MR = vecmem::host_memory_resource>
31-
int throughput_mt(std::string_view description, int argc, char* argv[],
32-
bool use_host_caching = false);
25+
template <typename FULL_CHAIN_ALG>
26+
int throughput_mt(std::string_view description, int argc, char* argv[]);
3327

3428
} // namespace traccc
3529

examples/run/common/throughput_mt.ipp

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
#include "traccc/performance/timing_info.hpp"
3939

4040
// VecMem include(s).
41-
#include <vecmem/memory/binary_page_memory_resource.hpp>
41+
#include <vecmem/memory/host_memory_resource.hpp>
4242

4343
// TBB include(s).
4444
#include <tbb/global_control.h>
@@ -61,9 +61,9 @@
6161

6262
namespace traccc {
6363

64-
template <typename FULL_CHAIN_ALG, typename HOST_MR>
65-
int throughput_mt(std::string_view description, int argc, char* argv[],
66-
bool use_host_caching) {
64+
template <typename FULL_CHAIN_ALG>
65+
int throughput_mt(std::string_view description, int argc, char* argv[]) {
66+
6767
std::unique_ptr<const traccc::Logger> ilogger = traccc::getDefaultLogger(
6868
"ThroughputExample", traccc::Logging::Level::INFO);
6969
TRACCC_LOCAL_LOGGER(std::move(ilogger));
@@ -92,36 +92,36 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
9292
performance::timing_info times;
9393

9494
// Memory resource to use in the test.
95-
HOST_MR uncached_host_mr;
95+
vecmem::host_memory_resource host_mr;
9696

9797
// Construct the detector description object.
98-
traccc::silicon_detector_description::host det_descr{uncached_host_mr};
98+
traccc::silicon_detector_description::host det_descr{host_mr};
9999
traccc::io::read_detector_description(
100100
det_descr, detector_opts.detector_file, detector_opts.digitization_file,
101101
(detector_opts.use_detray_detector ? traccc::data_format::json
102102
: traccc::data_format::csv));
103103

104104
// Construct a Detray detector object, if supported by the configuration.
105-
traccc::default_detector::host detector{uncached_host_mr};
105+
traccc::default_detector::host detector{host_mr};
106106
if (detector_opts.use_detray_detector) {
107107
traccc::io::read_detector(
108-
detector, uncached_host_mr, detector_opts.detector_file,
108+
detector, host_mr, detector_opts.detector_file,
109109
detector_opts.material_file, detector_opts.grid_file);
110110
}
111111

112112
// Construct the magnetic field object.
113113
const auto field = details::make_magnetic_field(bfield_opts);
114114

115115
// Read in all input events into memory.
116-
vecmem::vector<edm::silicon_cell_collection::host> input{&uncached_host_mr};
116+
vecmem::vector<edm::silicon_cell_collection::host> input{&host_mr};
117117
{
118118
performance::timer t{"File reading", times};
119119
// Set up the container for the input events.
120120
input.reserve(input_opts.events);
121121
const std::size_t first_event = input_opts.skip;
122122
const std::size_t last_event = input_opts.skip + input_opts.events;
123123
for (std::size_t i = first_event; i < last_event; ++i) {
124-
input.emplace_back(uncached_host_mr);
124+
input.emplace_back(host_mr);
125125
}
126126
// Read the input cells into memory in parallel.
127127
tbb::parallel_for(
@@ -138,19 +138,6 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
138138
});
139139
}
140140

141-
// Set up cached memory resources on top of the host memory resource
142-
// separately for each CPU thread.
143-
std::vector<std::unique_ptr<vecmem::binary_page_memory_resource> >
144-
cached_host_mrs;
145-
if (use_host_caching) {
146-
cached_host_mrs.reserve(threading_opts.threads + 1);
147-
for (std::size_t i = 0; i < threading_opts.threads + 1; ++i) {
148-
cached_host_mrs.push_back(
149-
std::make_unique<vecmem::binary_page_memory_resource>(
150-
uncached_host_mr));
151-
}
152-
}
153-
154141
// Algorithm configuration(s).
155142
typename FULL_CHAIN_ALG::clustering_algorithm::config_type clustering_cfg(
156143
clusterization_opts);
@@ -170,16 +157,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
170157
std::vector<FULL_CHAIN_ALG> algs;
171158
algs.reserve(threading_opts.threads + 1);
172159
for (std::size_t i = 0; i < threading_opts.threads + 1; ++i) {
173-
174-
vecmem::memory_resource& alg_host_mr =
175-
use_host_caching
176-
? static_cast<vecmem::memory_resource&>(
177-
*(cached_host_mrs.at(i)))
178-
: static_cast<vecmem::memory_resource&>(uncached_host_mr);
179160
algs.push_back(
180-
{alg_host_mr, clustering_cfg, seedfinder_config,
181-
spacepoint_grid_config, seedfilter_config, finding_cfg,
182-
fitting_cfg, det_descr, field,
161+
{host_mr, clustering_cfg, seedfinder_config, spacepoint_grid_config,
162+
seedfilter_config, finding_cfg, fitting_cfg, det_descr, field,
183163
(detector_opts.use_detray_detector ? &detector : nullptr),
184164
logger().clone()});
185165
}
@@ -304,10 +284,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
304284
group.wait();
305285
}
306286

307-
// Delete the algorithms and host memory caches explicitly before their
308-
// parent object would go out of scope.
287+
// Delete the algorithms explicitly before their parent object would go out
288+
// of scope.
309289
algs.clear();
310-
cached_host_mrs.clear();
311290

312291
// Print some results.
313292
TRACCC_INFO("Reconstructed track parameters: " << rec_track_params.load());

examples/run/common/throughput_st.hpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2022 CERN for the benefit of the ACTS project
3+
* (c) 2022-2025 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
77

88
#pragma once
99

10-
// Projection include(s).
11-
#include "traccc/seeding/detail/seeding_config.hpp"
12-
13-
// VecMem include(s).
14-
#include <vecmem/memory/host_memory_resource.hpp>
15-
1610
// System include(s).
1711
#include <string_view>
1812

@@ -21,18 +15,15 @@ namespace traccc {
2115
/// Helper function running a single-threaded throughput test
2216
///
2317
/// @tparam FULL_CHAIN_ALG The type of the full chain algorithm to use
24-
/// @tparam HOST_MR The host memory resource type to use
18+
///
2519
/// @param description A short description of the application
2620
/// @param argc The count of command line arguments (from @c main(...))
2721
/// @param argv The command line arguments (from @c main(...))
28-
/// @param use_host_caching Flag specifying whether host-side memory caching
29-
/// should be used
22+
///
3023
/// @return The value to be returned from @c main(...)
3124
///
32-
template <typename FULL_CHAIN_ALG,
33-
typename HOST_MR = vecmem::host_memory_resource>
34-
int throughput_st(std::string_view description, int argc, char* argv[],
35-
bool use_host_caching = false);
25+
template <typename FULL_CHAIN_ALG>
26+
int throughput_st(std::string_view description, int argc, char* argv[]);
3627

3728
} // namespace traccc
3829

0 commit comments

Comments
 (0)