diff --git a/device/cuda/src/finding/kernels/specializations/propagate_to_next_surface_src.cuh b/device/cuda/src/finding/kernels/specializations/propagate_to_next_surface_src.cuh index b139c6503e..6152a66c16 100644 --- a/device/cuda/src/finding/kernels/specializations/propagate_to_next_surface_src.cuh +++ b/device/cuda/src/finding/kernels/specializations/propagate_to_next_surface_src.cuh @@ -22,6 +22,10 @@ __global__ __launch_bounds__(128) void propagate_to_next_surface( const finding_config cfg, device::propagate_to_next_surface_payload payload) { +#if defined(__CUDA_ARCH__) && CUDART_VERSION >= 13000 + asm(".pragma \"enable_smem_spilling\";"); +#endif + device::propagate_to_next_surface( details::global_index1(), cfg, payload); } diff --git a/device/cuda/src/fitting/kernels/specializations/fit_backward_src.cuh b/device/cuda/src/fitting/kernels/specializations/fit_backward_src.cuh index 28286cfa30..5c077e9ba4 100644 --- a/device/cuda/src/fitting/kernels/specializations/fit_backward_src.cuh +++ b/device/cuda/src/fitting/kernels/specializations/fit_backward_src.cuh @@ -16,6 +16,11 @@ namespace kernels { template __global__ __launch_bounds__(128) void fit_backward( const fitting_config cfg, const device::fit_payload payload) { + +#if defined(__CUDA_ARCH__) && CUDART_VERSION >= 13000 + asm(".pragma \"enable_smem_spilling\";"); +#endif + device::fit_backward(details::global_index1(), cfg, payload); } } // namespace kernels diff --git a/device/cuda/src/fitting/kernels/specializations/fit_forward_src.cuh b/device/cuda/src/fitting/kernels/specializations/fit_forward_src.cuh index 2727c331b6..b114cba0b3 100644 --- a/device/cuda/src/fitting/kernels/specializations/fit_forward_src.cuh +++ b/device/cuda/src/fitting/kernels/specializations/fit_forward_src.cuh @@ -16,6 +16,11 @@ namespace kernels { template __global__ __launch_bounds__(128) void fit_forward( const fitting_config cfg, const device::fit_payload payload) { + +#if defined(__CUDA_ARCH__) && CUDART_VERSION >= 13000 + asm(".pragma \"enable_smem_spilling\";"); +#endif + device::fit_forward(details::global_index1(), cfg, payload); } } // namespace kernels