CUDA/HIP: replace further casts with ggml_cuda_convert_val

IMbackK · IMbackK · commit 1e3a92b434ab · 2025-08-13T14:18:32.000+02:00
diff --git a/ggml/src/ggml-cuda/getrows.cu b/ggml/src/ggml-cuda/getrows.cu
@@ -1,5 +1,6 @@
 #include "getrows.cuh"
 #include "dequantize.cuh"
+#include "convert.cuh"
 
 template<int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
 static __global__ void k_get_rows(
@@ -34,8 +35,8 @@ static __global__ void k_get_rows(
     dfloat2 v;
     dequantize_kernel(src0_row, ib, iqs, v);
 
-    dst_row[iybs + iqs + 0]        = float(v.x);
-    dst_row[iybs + iqs + y_offset] = float(v.y);
+    dst_row[iybs + iqs + 0]        = ggml_cuda_convert_val<float, dst_t>(v.x);
+    dst_row[iybs + iqs + y_offset] = ggml_cuda_convert_val<float, dst_t>(v.y);
 }
 
 template<typename src0_t, typename dst_t>
@@ -62,7 +63,7 @@ static __global__ void k_get_rows_float(
     dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3;
     const src0_t * src0_row = (const src0_t *)((const char *) src0 + i01*nb01 + i11*nb02 + i12*nb03);
 
-    dst_row[i00] = float(src0_row[i00]);
+    dst_row[i00] = ggml_cuda_convert_val<src0_t, dst_t>(src0_row[i00]);
 }
 
 template<typename grad_t, typename dst_t>
diff --git a/ggml/src/ggml-cuda/mmvf.cu b/ggml/src/ggml-cuda/mmvf.cu
@@ -1,5 +1,6 @@
 #include "ggml.h"
 #include "common.cuh"
+#include "convert.cuh"
 #include "mmvf.cuh"
 
 template <typename T, typename type_acc, int ncols_dst, int block_size>
@@ -93,8 +94,8 @@ static __global__ void mul_mat_vec_f(
 #pragma unroll
             for (int j = 0; j < ncols_dst; ++j) {
                 const float2 tmpy = y2[j*stride_col_y2 + col2];
-                sumf[j] += float(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[0]) * tmpy.x;
-                sumf[j] += float(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[1]) * tmpy.y;
+                sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[0]) * tmpy.x;
+                sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[1]) * tmpy.y;
             }
         }
     } else {