convert_val -> cast

JohannesGaessler · JohannesGaessler · commit 173df5e4a8b4 · 2025-08-14T12:37:50.000+02:00
diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu
@@ -31,8 +31,8 @@ static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __
     dequantize_kernel(vx, ib, iqs, v);
 
     const int64_t iy0 = ((i03*ne02 + i02)*ne01 + i01)*ne00 + iybs + iqs;
-    y[iy0 + 0]        = ggml_cuda_convert_val<float, dst_t>(v.x);
-    y[iy0 + y_offset] = ggml_cuda_convert_val<float, dst_t>(v.y);
+    y[iy0 + 0]        = ggml_cuda_cast<float, dst_t>(v.x);
+    y[iy0 + y_offset] = ggml_cuda_cast<float, dst_t>(v.y);
 }
 
 template <bool need_check>
@@ -630,7 +630,7 @@ static __global__ void convert_unary(
 
     const int64_t ix = i03*s03 + i02*s02 + i01*s01 + i00;
     const int64_t iy = ((i03*ne02 + i02)*ne01 + i01)*ne00 + i00;
-    y[iy] = ggml_cuda_convert_val<src_t, dst_t>(x[ix]);
+    y[iy] = ggml_cuda_cast<src_t, dst_t>(x[ix]);
 }
 
 template <typename src_t, typename dst_t>
diff --git a/ggml/src/ggml-cuda/convert.cuh b/ggml/src/ggml-cuda/convert.cuh
@@ -31,7 +31,7 @@ to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type);
 to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type);
 
 template<typename src_t, typename dest_t>
- __host__ __device__ inline dest_t ggml_cuda_convert_val(src_t x) {
+ __host__ __device__ inline dest_t ggml_cuda_cast(src_t x) {
     if constexpr (std::is_same_v<src_t, dest_t>) {
         return x;
     } else {
@@ -40,31 +40,31 @@ template<typename src_t, typename dest_t>
 }
 
 template<>
-__host__ __device__ inline float ggml_cuda_convert_val<nv_bfloat16, float>(nv_bfloat16 x) {
+__host__ __device__ inline float ggml_cuda_cast<nv_bfloat16, float>(nv_bfloat16 x) {
     return __bfloat162float(x);
 }
 
 template<>
-__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<float, nv_bfloat16>(float x) {
+__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<float, nv_bfloat16>(float x) {
     return __float2bfloat16(x);
 }
 
 template<>
-__host__ __device__ inline half ggml_cuda_convert_val<nv_bfloat16, half>(nv_bfloat16 x) {
+__host__ __device__ inline half ggml_cuda_cast<nv_bfloat16, half>(nv_bfloat16 x) {
     return half(__bfloat162float(x));
 }
 
 template<>
-__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<half, nv_bfloat16>(half x) {
+__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<half, nv_bfloat16>(half x) {
     return __float2bfloat16(float(x));
 }
 
 template<>
-__host__ __device__ inline int ggml_cuda_convert_val<nv_bfloat16, int>(nv_bfloat16 x) {
+__host__ __device__ inline int ggml_cuda_cast<nv_bfloat16, int>(nv_bfloat16 x) {
     return int(__bfloat162float(x));
 }
 
 template<>
-__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<int, nv_bfloat16>(int x) {
+__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<int, nv_bfloat16>(int x) {
     return __float2bfloat16(float(x));
 }
diff --git a/ggml/src/ggml-cuda/cpy-utils.cuh b/ggml/src/ggml-cuda/cpy-utils.cuh
@@ -5,11 +5,7 @@
 
 template<typename src_t, typename dst_t>
 static __device__ __forceinline__ void convert_flt(const src_t * src, dst_t * dst) {
-    if constexpr (std::is_same_v<src_t, dst_t>) {
-        *dst = *src;
-    } else {
-        *dst = ggml_cuda_convert_val<src_t, dst_t>(*src);
-    }
+    *dst = ggml_cuda_cast<src_t, dst_t>(*src);
 }
 
 static __device__ __forceinline__ int best_index_int8(int n, const int8_t * val, float x) {
diff --git a/ggml/src/ggml-cuda/getrows.cu b/ggml/src/ggml-cuda/getrows.cu
@@ -35,8 +35,8 @@ static __global__ void k_get_rows(
     dfloat2 v;
     dequantize_kernel(src0_row, ib, iqs, v);
 
-    dst_row[iybs + iqs + 0]        = ggml_cuda_convert_val<float, dst_t>(v.x);
-    dst_row[iybs + iqs + y_offset] = ggml_cuda_convert_val<float, dst_t>(v.y);
+    dst_row[iybs + iqs + 0]        = ggml_cuda_cast<float, dst_t>(v.x);
+    dst_row[iybs + iqs + y_offset] = ggml_cuda_cast<float, dst_t>(v.y);
 }
 
 template<typename src0_t, typename dst_t>
@@ -63,7 +63,7 @@ static __global__ void k_get_rows_float(
     dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3;
     const src0_t * src0_row = (const src0_t *)((const char *) src0 + i01*nb01 + i11*nb02 + i12*nb03);
 
-    dst_row[i00] = ggml_cuda_convert_val<src0_t, dst_t>(src0_row[i00]);
+    dst_row[i00] = ggml_cuda_cast<src0_t, dst_t>(src0_row[i00]);
 }
 
 template<typename grad_t, typename dst_t>
diff --git a/ggml/src/ggml-cuda/mmvf.cu b/ggml/src/ggml-cuda/mmvf.cu
@@ -94,8 +94,8 @@ static __global__ void mul_mat_vec_f(
 #pragma unroll
             for (int j = 0; j < ncols_dst; ++j) {
                 const float2 tmpy = y2[j*stride_col_y2 + col2];
-                sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[0]) * tmpy.x;
-                sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[1]) * tmpy.y;
+                sumf[j] += ggml_cuda_cast<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[0]) * tmpy.x;
+                sumf[j] += ggml_cuda_cast<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[1]) * tmpy.y;
             }
         }
     } else {

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type);`
`31`	`31`	`to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type);`
`32`	`32`
`33`	`33`	`template<typename src_t, typename dest_t>`
`34`		`- __host__ __device__ inline dest_t ggml_cuda_convert_val(src_t x) {`
	`34`	`+ __host__ __device__ inline dest_t ggml_cuda_cast(src_t x) {`
`35`	`35`	`if constexpr (std::is_same_v<src_t, dest_t>) {`
`36`	`36`	`return x;`
`37`	`37`	`} else {`
`@@ -40,31 +40,31 @@ template<typename src_t, typename dest_t>`
`40`	`40`	`}`
`41`	`41`
`42`	`42`	`template<>`
`43`		`-__host__ __device__ inline float ggml_cuda_convert_val<nv_bfloat16, float>(nv_bfloat16 x) {`
	`43`	`+__host__ __device__ inline float ggml_cuda_cast<nv_bfloat16, float>(nv_bfloat16 x) {`
`44`	`44`	`return __bfloat162float(x);`
`45`	`45`	`}`
`46`	`46`
`47`	`47`	`template<>`
`48`		`-__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<float, nv_bfloat16>(float x) {`
	`48`	`+__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<float, nv_bfloat16>(float x) {`
`49`	`49`	`return __float2bfloat16(x);`
`50`	`50`	`}`
`51`	`51`
`52`	`52`	`template<>`
`53`		`-__host__ __device__ inline half ggml_cuda_convert_val<nv_bfloat16, half>(nv_bfloat16 x) {`
	`53`	`+__host__ __device__ inline half ggml_cuda_cast<nv_bfloat16, half>(nv_bfloat16 x) {`
`54`	`54`	`return half(__bfloat162float(x));`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`template<>`
`58`		`-__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<half, nv_bfloat16>(half x) {`
	`58`	`+__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<half, nv_bfloat16>(half x) {`
`59`	`59`	`return __float2bfloat16(float(x));`
`60`	`60`	`}`
`61`	`61`
`62`	`62`	`template<>`
`63`		`-__host__ __device__ inline int ggml_cuda_convert_val<nv_bfloat16, int>(nv_bfloat16 x) {`
	`63`	`+__host__ __device__ inline int ggml_cuda_cast<nv_bfloat16, int>(nv_bfloat16 x) {`
`64`	`64`	`return int(__bfloat162float(x));`
`65`	`65`	`}`
`66`	`66`
`67`	`67`	`template<>`
`68`		`-__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<int, nv_bfloat16>(int x) {`
	`68`	`+__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<int, nv_bfloat16>(int x) {`
`69`	`69`	`return __float2bfloat16(float(x));`
`70`	`70`	`}`
Original file line number	Diff line number	Diff line change
`@@ -94,8 +94,8 @@ static __global__ void mul_mat_vec_f(`
`94`	`94`	`#pragma unroll`
`95`	`95`	`for (int j = 0; j < ncols_dst; ++j) {`
`96`	`96`	`const float2 tmpy = y2[j*stride_col_y2 + col2];`
`97`		`- sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 >(&tmpx)[0]) tmpy.x;`
`98`		`- sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 >(&tmpx)[1]) tmpy.y;`
	`97`	`+ sumf[j] += ggml_cuda_cast<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 >(&tmpx)[0]) tmpy.x;`
	`98`	`+ sumf[j] += ggml_cuda_cast<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 >(&tmpx)[1]) tmpy.y;`
`99`	`99`	`}`
`100`	`100`	`}`
`101`	`101`	`} else {`