Skip to content

Commit 9dff675

Browse files
authored
Merge pull request #1037 from stephenswat/perf/cuda_launch_config
Update CUDA propagation and fit kernel launch
2 parents e57a5d6 + 7f97695 commit 9dff675

File tree

5 files changed

+7
-7
lines changed

5 files changed

+7
-7
lines changed

device/cuda/src/finding/combinatorial_kalman_filter.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ combinatorial_kalman_filter(
369369
.tips_view = tips_buffer,
370370
.tip_lengths_view = tip_length_buffer};
371371

372-
const unsigned int nThreads = warp_size * 2;
372+
const unsigned int nThreads = warp_size * 4;
373373
const unsigned int nBlocks =
374374
(n_candidates + nThreads - 1) / nThreads;
375375
propagate_to_next_surface<

device/cuda/src/finding/kernels/specializations/propagate_to_next_surface_src.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace traccc::cuda {
1818
namespace kernels {
1919

2020
template <typename propagator_t, typename bfield_t>
21-
__global__ void propagate_to_next_surface(
21+
__global__ __launch_bounds__(128, 8) void propagate_to_next_surface(
2222
const finding_config cfg,
2323
device::propagate_to_next_surface_payload<propagator_t, bfield_t> payload) {
2424

device/cuda/src/fitting/kalman_fitting.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ track_state_container_types::buffer kalman_fitting(
109109
param_liveness_setup_event->ignore();
110110

111111
// Launch parameters for all the kernels.
112-
const unsigned int nThreads = warp_size * 2;
112+
const unsigned int nThreads = warp_size * 4;
113113
const unsigned int nBlocks = (n_tracks + nThreads - 1) / nThreads;
114114

115115
// Fill the keys and param_ids buffers.

device/cuda/src/fitting/kernels/specializations/fit_backward_src.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
namespace traccc::cuda {
1515
namespace kernels {
1616
template <typename fitter_t>
17-
__global__ void fit_backward(const fitting_config cfg,
18-
const device::fit_payload<fitter_t> payload) {
17+
__global__ __launch_bounds__(128, 8) void fit_backward(
18+
const fitting_config cfg, const device::fit_payload<fitter_t> payload) {
1919
device::fit_backward<fitter_t>(details::global_index1(), cfg, payload);
2020
}
2121
} // namespace kernels

device/cuda/src/fitting/kernels/specializations/fit_forward_src.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
namespace traccc::cuda {
1515
namespace kernels {
1616
template <typename fitter_t>
17-
__global__ void fit_forward(const fitting_config cfg,
18-
const device::fit_payload<fitter_t> payload) {
17+
__global__ __launch_bounds__(128, 8) void fit_forward(
18+
const fitting_config cfg, const device::fit_payload<fitter_t> payload) {
1919
device::fit_forward<fitter_t>(details::global_index1(), cfg, payload);
2020
}
2121
} // namespace kernels

0 commit comments

Comments
 (0)