Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 30 additions & 26 deletions examples/run/alpaka/full_chain_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ full_chain_algorithm::full_chain_algorithm(
m_queue(),
m_vecmem_objects(m_queue),
m_host_mr(host_mr),
m_cached_device_mr(
std::make_unique<::vecmem::binary_page_memory_resource>(
m_vecmem_objects.device_mr())),
m_cached_pinned_host_mr(m_vecmem_objects.host_mr()),
m_cached_device_mr(m_vecmem_objects.device_mr()),
m_field_vec{0.f, 0.f, finder_config.bFieldInZ},
m_field(field),
m_det_descr(det_descr),
Expand All @@ -40,29 +39,27 @@ full_chain_algorithm::full_chain_algorithm(
m_det_descr.get().size()),
m_vecmem_objects.device_mr()),
m_detector(detector),
m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr},
m_clusterization({m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
clustering_config),
m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
m_measurement_sorting({m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
logger->cloneWithSuffix("MeasSortingAlg")),
m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
m_spacepoint_formation({m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
logger->cloneWithSuffix("SpFormationAlg")),
m_seeding(finder_config, grid_config, filter_config,
memory_resource{*m_cached_device_mr, &m_host_mr},
{m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
logger->cloneWithSuffix("SeedingAlg")),
m_track_parameter_estimation(
memory_resource{*m_cached_device_mr, &m_host_mr},
{m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
logger->cloneWithSuffix("TrackParamEstAlg")),
m_finding(finding_config,
memory_resource{*m_cached_device_mr, &m_host_mr},
m_finding(finding_config, {m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
logger->cloneWithSuffix("TrackFindingAlg")),
m_fitting(fitting_config,
memory_resource{*m_cached_device_mr, &m_host_mr},
m_fitting(fitting_config, {m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
logger->cloneWithSuffix("TrackFittingAlg")),
m_clustering_config(clustering_config),
Expand Down Expand Up @@ -92,9 +89,8 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
m_queue(),
m_vecmem_objects(m_queue),
m_host_mr(parent.m_host_mr),
m_cached_device_mr(
std::make_unique<::vecmem::binary_page_memory_resource>(
m_vecmem_objects.device_mr())),
m_cached_pinned_host_mr(m_vecmem_objects.host_mr()),
m_cached_device_mr(m_vecmem_objects.device_mr()),
m_field_vec(parent.m_field_vec),
m_field(parent.m_field),
m_det_descr(parent.m_det_descr),
Expand All @@ -103,30 +99,30 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
m_det_descr.get().size()),
m_vecmem_objects.device_mr()),
m_detector(parent.m_detector),
m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr},
m_clusterization({m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.m_clustering_config),
m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
m_measurement_sorting({m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.logger().cloneWithSuffix("MeasSortingAlg")),
m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
m_spacepoint_formation({m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.logger().cloneWithSuffix("SpFormationAlg")),
m_seeding(parent.m_finder_config, parent.m_grid_config,
parent.m_filter_config,
memory_resource{*m_cached_device_mr, &m_host_mr},
{m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.logger().cloneWithSuffix("SeedingAlg")),
m_track_parameter_estimation(
memory_resource{*m_cached_device_mr, &m_host_mr},
{m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.logger().cloneWithSuffix("TrackParamEstAlg")),
m_finding(parent.m_finding_config,
memory_resource{*m_cached_device_mr, &m_host_mr},
{m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.logger().cloneWithSuffix("TrackFindingAlg")),
m_fitting(parent.m_fitting_config,
memory_resource{*m_cached_device_mr, &m_host_mr},
{m_cached_device_mr, &m_cached_pinned_host_mr},
m_vecmem_objects.async_copy(), m_queue,
parent.logger().cloneWithSuffix("TrackFittingAlg")),
m_clustering_config(parent.m_clustering_config),
Expand Down Expand Up @@ -156,7 +152,7 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(

// Create device copy of input collections
edm::silicon_cell_collection::buffer cells_buffer(
static_cast<unsigned int>(cells.size()), *m_cached_device_mr);
static_cast<unsigned int>(cells.size()), m_cached_device_mr);
m_vecmem_objects.async_copy()(::vecmem::get_data(cells), cells_buffer)
->ignore();

Expand Down Expand Up @@ -184,8 +180,12 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(
m_device_detector_view, m_field, {track_candidates, measurements});

// Copy a limited amount of result data back to the host.
const auto host_tracks = m_vecmem_objects.async_copy().to(
track_states.tracks, m_cached_pinned_host_mr, nullptr,
::vecmem::copy::type::device_to_host);
output_type result{m_host_mr};
m_vecmem_objects.async_copy()(track_states.tracks, result)->wait();
::vecmem::copy host_copy;
host_copy(host_tracks, result)->wait();
return result;

}
Expand All @@ -207,7 +207,7 @@ bound_track_parameters_collection_types::host full_chain_algorithm::seeding(

// Create device copy of input collections
edm::silicon_cell_collection::buffer cells_buffer(
static_cast<unsigned int>(cells.size()), *m_cached_device_mr);
static_cast<unsigned int>(cells.size()), m_cached_device_mr);
m_vecmem_objects.async_copy()(::vecmem::get_data(cells), cells_buffer)
->ignore();

Expand All @@ -227,8 +227,12 @@ bound_track_parameters_collection_types::host full_chain_algorithm::seeding(
m_seeding(spacepoints), m_field_vec);

// Copy a limited amount of result data back to the host.
const auto host_seeds = m_vecmem_objects.async_copy().to(
track_params, m_cached_pinned_host_mr,
::vecmem::copy::type::device_to_host);
bound_track_parameters_collection_types::host result{&m_host_mr};
m_vecmem_objects.async_copy()(track_params, result)->wait();
::vecmem::copy host_copy;
host_copy(host_seeds, result)->wait();
return result;

}
Expand Down
4 changes: 3 additions & 1 deletion examples/run/alpaka/full_chain_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ class full_chain_algorithm

/// Host memory resource
::vecmem::memory_resource& m_host_mr;
/// Cached pinned host memory resource
mutable ::vecmem::binary_page_memory_resource m_cached_pinned_host_mr;
/// Device caching memory resource
std::unique_ptr<::vecmem::binary_page_memory_resource> m_cached_device_mr;
mutable ::vecmem::binary_page_memory_resource m_cached_device_mr;

/// Constant B field for the (seed) track parameter estimation
traccc::vector3 m_field_vec;
Expand Down
7 changes: 2 additions & 5 deletions examples/run/alpaka/throughput_mt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
int main(int argc, char* argv[]) {

// Execute the throughput test.
static const bool use_host_caching = true;
return traccc::throughput_mt<traccc::alpaka::full_chain_algorithm,
vecmem::host_memory_resource>(
"Multi-threaded Alpaka GPU throughput tests", argc, argv,
use_host_caching);
return traccc::throughput_mt<traccc::alpaka::full_chain_algorithm>(
"Multi-threaded Alpaka GPU throughput tests", argc, argv);
}
7 changes: 2 additions & 5 deletions examples/run/alpaka/throughput_st.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
int main(int argc, char* argv[]) {

// Execute the throughput test.
static const bool use_host_caching = true;
return traccc::throughput_st<traccc::alpaka::full_chain_algorithm,
vecmem::host_memory_resource>(
"Single-threaded Alpaka GPU throughput tests", argc, argv,
use_host_caching);
return traccc::throughput_st<traccc::alpaka::full_chain_algorithm>(
"Single-threaded Alpaka GPU throughput tests", argc, argv);
}
16 changes: 5 additions & 11 deletions examples/run/common/throughput_mt.hpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2022 CERN for the benefit of the ACTS project
* (c) 2022-2025 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

// VecMem include(s).
#include <vecmem/memory/host_memory_resource.hpp>

// System include(s).
#include <string_view>

Expand All @@ -18,18 +15,15 @@ namespace traccc {
/// Helper function running a multi-threaded throughput test
///
/// @tparam FULL_CHAIN_ALG The type of the full chain algorithm to use
/// @tparam HOST_MR The host memory resource type to use
///
/// @param description A short description of the application
/// @param argc The count of command line arguments (from @c main(...))
/// @param argv The command line arguments (from @c main(...))
/// @param use_host_caching Flag specifying whether host-side memory caching
/// should be used
///
/// @return The value to be returned from @c main(...)
///
template <typename FULL_CHAIN_ALG,
typename HOST_MR = vecmem::host_memory_resource>
int throughput_mt(std::string_view description, int argc, char* argv[],
bool use_host_caching = false);
template <typename FULL_CHAIN_ALG>
int throughput_mt(std::string_view description, int argc, char* argv[]);

} // namespace traccc

Expand Down
49 changes: 14 additions & 35 deletions examples/run/common/throughput_mt.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#include "traccc/performance/timing_info.hpp"

// VecMem include(s).
#include <vecmem/memory/binary_page_memory_resource.hpp>
#include <vecmem/memory/host_memory_resource.hpp>

// TBB include(s).
#include <tbb/global_control.h>
Expand All @@ -61,9 +61,9 @@

namespace traccc {

template <typename FULL_CHAIN_ALG, typename HOST_MR>
int throughput_mt(std::string_view description, int argc, char* argv[],
bool use_host_caching) {
template <typename FULL_CHAIN_ALG>
int throughput_mt(std::string_view description, int argc, char* argv[]) {

std::unique_ptr<const traccc::Logger> ilogger = traccc::getDefaultLogger(
"ThroughputExample", traccc::Logging::Level::INFO);
TRACCC_LOCAL_LOGGER(std::move(ilogger));
Expand Down Expand Up @@ -92,36 +92,36 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
performance::timing_info times;

// Memory resource to use in the test.
HOST_MR uncached_host_mr;
vecmem::host_memory_resource host_mr;

// Construct the detector description object.
traccc::silicon_detector_description::host det_descr{uncached_host_mr};
traccc::silicon_detector_description::host det_descr{host_mr};
traccc::io::read_detector_description(
det_descr, detector_opts.detector_file, detector_opts.digitization_file,
(detector_opts.use_detray_detector ? traccc::data_format::json
: traccc::data_format::csv));

// Construct a Detray detector object, if supported by the configuration.
traccc::default_detector::host detector{uncached_host_mr};
traccc::default_detector::host detector{host_mr};
if (detector_opts.use_detray_detector) {
traccc::io::read_detector(
detector, uncached_host_mr, detector_opts.detector_file,
detector, host_mr, detector_opts.detector_file,
detector_opts.material_file, detector_opts.grid_file);
}

// Construct the magnetic field object.
const auto field = details::make_magnetic_field(bfield_opts);

// Read in all input events into memory.
vecmem::vector<edm::silicon_cell_collection::host> input{&uncached_host_mr};
vecmem::vector<edm::silicon_cell_collection::host> input{&host_mr};
{
performance::timer t{"File reading", times};
// Set up the container for the input events.
input.reserve(input_opts.events);
const std::size_t first_event = input_opts.skip;
const std::size_t last_event = input_opts.skip + input_opts.events;
for (std::size_t i = first_event; i < last_event; ++i) {
input.emplace_back(uncached_host_mr);
input.emplace_back(host_mr);
}
// Read the input cells into memory in parallel.
tbb::parallel_for(
Expand All @@ -138,19 +138,6 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
});
}

// Set up cached memory resources on top of the host memory resource
// separately for each CPU thread.
std::vector<std::unique_ptr<vecmem::binary_page_memory_resource> >
cached_host_mrs;
if (use_host_caching) {
cached_host_mrs.reserve(threading_opts.threads + 1);
for (std::size_t i = 0; i < threading_opts.threads + 1; ++i) {
cached_host_mrs.push_back(
std::make_unique<vecmem::binary_page_memory_resource>(
uncached_host_mr));
}
}

// Algorithm configuration(s).
typename FULL_CHAIN_ALG::clustering_algorithm::config_type clustering_cfg(
clusterization_opts);
Expand All @@ -170,16 +157,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
std::vector<FULL_CHAIN_ALG> algs;
algs.reserve(threading_opts.threads + 1);
for (std::size_t i = 0; i < threading_opts.threads + 1; ++i) {

vecmem::memory_resource& alg_host_mr =
use_host_caching
? static_cast<vecmem::memory_resource&>(
*(cached_host_mrs.at(i)))
: static_cast<vecmem::memory_resource&>(uncached_host_mr);
algs.push_back(
{alg_host_mr, clustering_cfg, seedfinder_config,
spacepoint_grid_config, seedfilter_config, finding_cfg,
fitting_cfg, det_descr, field,
{host_mr, clustering_cfg, seedfinder_config, spacepoint_grid_config,
seedfilter_config, finding_cfg, fitting_cfg, det_descr, field,
(detector_opts.use_detray_detector ? &detector : nullptr),
logger().clone()});
}
Expand Down Expand Up @@ -304,10 +284,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[],
group.wait();
}

// Delete the algorithms and host memory caches explicitly before their
// parent object would go out of scope.
// Delete the algorithms explicitly before their parent object would go out
// of scope.
algs.clear();
cached_host_mrs.clear();

// Print some results.
TRACCC_INFO("Reconstructed track parameters: " << rec_track_params.load());
Expand Down
19 changes: 5 additions & 14 deletions examples/run/common/throughput_st.hpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2022 CERN for the benefit of the ACTS project
* (c) 2022-2025 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

// Projection include(s).
#include "traccc/seeding/detail/seeding_config.hpp"

// VecMem include(s).
#include <vecmem/memory/host_memory_resource.hpp>

// System include(s).
#include <string_view>

Expand All @@ -21,18 +15,15 @@ namespace traccc {
/// Helper function running a single-threaded throughput test
///
/// @tparam FULL_CHAIN_ALG The type of the full chain algorithm to use
/// @tparam HOST_MR The host memory resource type to use
///
/// @param description A short description of the application
/// @param argc The count of command line arguments (from @c main(...))
/// @param argv The command line arguments (from @c main(...))
/// @param use_host_caching Flag specifying whether host-side memory caching
/// should be used
///
/// @return The value to be returned from @c main(...)
///
template <typename FULL_CHAIN_ALG,
typename HOST_MR = vecmem::host_memory_resource>
int throughput_st(std::string_view description, int argc, char* argv[],
bool use_host_caching = false);
template <typename FULL_CHAIN_ALG>
int throughput_st(std::string_view description, int argc, char* argv[]);

} // namespace traccc

Expand Down
Loading
Loading