Skip to content

Commit 1eba300

Browse files
authored
Fixes CUDA 13 Linux wheel build jobs (#4068)
1 parent 10a5002 commit 1eba300

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

src/libtorchaudio/cuctc/src/ctc_prefix_decoder_kernel_v2.cu

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
#include <float.h>
2727
#include <algorithm>
28+
#include <limits>
2829
#include "../include/ctc_prefix_decoder_host.h"
2930
#include "ctc_fast_divmod.cuh"
3031
#include "cub/cub.cuh"
@@ -440,7 +441,11 @@ __launch_bounds__(BLOCK_SIZE) void topk_reduce_and_copy_list_per_batch_kernel(
440441
topk_values,
441442
beam,
442443
items_per_batch,
444+
#if CUDART_VERSION >= 12090 // CUDA 12.9 and later
445+
std::numeric_limits<float>::lowest(),
446+
#else
443447
cub::FpLimits<float>::Lowest(),
448+
#endif
444449
block_topk_fun,
445450
set_key_value);
446451

src/libtorchaudio/forced_align/gpu/compute.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ __global__ void falign_cuda_step_kernel(
9494
alphas_a[curIdxOffset][i] = result + logProbs_a[batchIndex][t][labelIdx];
9595
threadMax = max(threadMax, alphas_a[curIdxOffset][i]);
9696
}
97+
#if CUDART_VERSION >= 12090 // CUDA 12.9 and later
98+
scalar_t maxResult = BlockReduce(tempStorage).Reduce(threadMax, thrust::maximum<scalar_t>());
99+
#else
97100
scalar_t maxResult = BlockReduce(tempStorage).Reduce(threadMax, cub::Max());
101+
#endif
98102
if (threadIdx.x == 0) {
99103
maxValue = maxResult;
100104
}

0 commit comments

Comments
 (0)