Skip to content

Commit 30b2c9b

Browse files
authored
Adjust propagation kernel launch bounds (#1131)
This commit modifies the launch bounds for our propagation and fitting kernels, removing the minimum number of blocks per SM. This helps performance as it allows the kernel to use more registers. The parallelism on these kernels is currently bound by local memory, anyway.
1 parent f48f986 commit 30b2c9b

File tree

3 files changed

+3
-3
lines changed

3 files changed

+3
-3
lines changed

device/cuda/src/finding/kernels/specializations/propagate_to_next_surface_src.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace traccc::cuda {
1818
namespace kernels {
1919

2020
template <typename propagator_t, typename bfield_t>
21-
__global__ __launch_bounds__(128, 8) void propagate_to_next_surface(
21+
__global__ __launch_bounds__(128) void propagate_to_next_surface(
2222
const finding_config cfg,
2323
device::propagate_to_next_surface_payload<propagator_t, bfield_t> payload) {
2424

device/cuda/src/fitting/kernels/specializations/fit_backward_src.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
namespace traccc::cuda {
1515
namespace kernels {
1616
template <typename fitter_t>
17-
__global__ __launch_bounds__(128, 8) void fit_backward(
17+
__global__ __launch_bounds__(128) void fit_backward(
1818
const fitting_config cfg, const device::fit_payload<fitter_t> payload) {
1919
device::fit_backward<fitter_t>(details::global_index1(), cfg, payload);
2020
}

device/cuda/src/fitting/kernels/specializations/fit_forward_src.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
namespace traccc::cuda {
1515
namespace kernels {
1616
template <typename fitter_t>
17-
__global__ __launch_bounds__(128, 8) void fit_forward(
17+
__global__ __launch_bounds__(128) void fit_forward(
1818
const fitting_config cfg, const device::fit_payload<fitter_t> payload) {
1919
device::fit_forward<fitter_t>(details::global_index1(), cfg, payload);
2020
}

0 commit comments

Comments
 (0)