CUDA/HIP: spechalize ggml_cuda_convert_val for half <-> bf16 conversions

IMbackK · IMbackK · commit 4738fa6c7226 · 2025-08-13T13:05:23.000+02:00
diff --git a/ggml/src/ggml-cuda/convert.cuh b/ggml/src/ggml-cuda/convert.cuh
@@ -34,23 +34,39 @@ to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type);
 template<typename src_t, typename dest_t>
  __host__ __device__ inline dest_t ggml_cuda_convert_val(src_t x)
 {
-    return float(x);
+    if constexpr (std::is_same_v<src_t, dest_t>) {
+        return x;
+    } else {
+        return float(x);
+    }
 }
 
 template<>
 __host__ __device__ inline float ggml_cuda_convert_val<nv_bfloat16, float>(nv_bfloat16 x)
 {
-    return  __bfloat162float(x);
+    return __bfloat162float(x);
 }
 
 template<>
 __host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<nv_bfloat16, nv_bfloat16>(nv_bfloat16 x)
 {
-    return  x;
+    return x;
 }
 
 template<>
 __host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<float, nv_bfloat16>(float x)
 {
     return __float2bfloat16(x);
 }
+
+template<>
+__host__ __device__ inline half ggml_cuda_convert_val<nv_bfloat16, half>(nv_bfloat16 x)
+{
+    return half(__bfloat162float(x));
+}
+
+template<>
+__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<half, nv_bfloat16>(half x)
+{
+    return __float2bfloat16(float(x));
+}

Original file line number	Diff line number	Diff line change
`@@ -34,23 +34,39 @@ to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type);`
`34`	`34`	`template<typename src_t, typename dest_t>`
`35`	`35`	`__host__ __device__ inline dest_t ggml_cuda_convert_val(src_t x)`
`36`	`36`	`{`
`37`		`- return float(x);`
	`37`	`+ if constexpr (std::is_same_v<src_t, dest_t>) {`
	`38`	`+ return x;`
	`39`	`+ } else {`
	`40`	`+ return float(x);`
	`41`	`+ }`
`38`	`42`	`}`
`39`	`43`
`40`	`44`	`template<>`
`41`	`45`	`__host__ __device__ inline float ggml_cuda_convert_val<nv_bfloat16, float>(nv_bfloat16 x)`
`42`	`46`	`{`
`43`		`- return __bfloat162float(x);`
	`47`	`+ return __bfloat162float(x);`
`44`	`48`	`}`
`45`	`49`
`46`	`50`	`template<>`
`47`	`51`	`__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<nv_bfloat16, nv_bfloat16>(nv_bfloat16 x)`
`48`	`52`	`{`
`49`		`- return x;`
	`53`	`+ return x;`
`50`	`54`	`}`
`51`	`55`
`52`	`56`	`template<>`
`53`	`57`	`__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<float, nv_bfloat16>(float x)`
`54`	`58`	`{`
`55`	`59`	`return __float2bfloat16(x);`
`56`	`60`	`}`
	`61`	`+`
	`62`	`+template<>`
	`63`	`+__host__ __device__ inline half ggml_cuda_convert_val<nv_bfloat16, half>(nv_bfloat16 x)`
	`64`	`+{`
	`65`	`+ return half(__bfloat162float(x));`
	`66`	`+}`
	`67`	`+`
	`68`	`+template<>`
	`69`	`+__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<half, nv_bfloat16>(half x)`
	`70`	`+{`
	`71`	`+ return __float2bfloat16(float(x));`
	`72`	`+}`