We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d76541a commit 300a59cCopy full SHA for 300a59c
csrc/cache_kernels.cu
@@ -16,7 +16,7 @@
16
17
#include <algorithm>
18
#include <cassert>
19
-#include <cfloat> // FLT_MIN
+#include <cfloat>
20
21
#ifdef USE_ROCM
22
#include <hip/hip_bf16.h>
@@ -479,6 +479,7 @@ __global__ void concat_and_cache_ds_mla_kernel(
479
480
// Compute the scale for the tile
481
float tile_scale = max_abs / 448.f;
482
+ tile_scale = fmaxf(tile_scale, FLT_MIN);
483
484
// The first lane of each half-warp writes the scale to kv_cache
485
if ((lane_idx == 0) || (lane_idx == 16)) {
0 commit comments