Skip to content

Commit 173df5e

Browse files
convert_val -> cast
1 parent e9ff641 commit 173df5e

File tree

5 files changed

+16
-20
lines changed

5 files changed

+16
-20
lines changed

ggml/src/ggml-cuda/convert.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __
3131
dequantize_kernel(vx, ib, iqs, v);
3232

3333
const int64_t iy0 = ((i03*ne02 + i02)*ne01 + i01)*ne00 + iybs + iqs;
34-
y[iy0 + 0] = ggml_cuda_convert_val<float, dst_t>(v.x);
35-
y[iy0 + y_offset] = ggml_cuda_convert_val<float, dst_t>(v.y);
34+
y[iy0 + 0] = ggml_cuda_cast<float, dst_t>(v.x);
35+
y[iy0 + y_offset] = ggml_cuda_cast<float, dst_t>(v.y);
3636
}
3737

3838
template <bool need_check>
@@ -630,7 +630,7 @@ static __global__ void convert_unary(
630630

631631
const int64_t ix = i03*s03 + i02*s02 + i01*s01 + i00;
632632
const int64_t iy = ((i03*ne02 + i02)*ne01 + i01)*ne00 + i00;
633-
y[iy] = ggml_cuda_convert_val<src_t, dst_t>(x[ix]);
633+
y[iy] = ggml_cuda_cast<src_t, dst_t>(x[ix]);
634634
}
635635

636636
template <typename src_t, typename dst_t>

ggml/src/ggml-cuda/convert.cuh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type);
3131
to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type);
3232

3333
template<typename src_t, typename dest_t>
34-
__host__ __device__ inline dest_t ggml_cuda_convert_val(src_t x) {
34+
__host__ __device__ inline dest_t ggml_cuda_cast(src_t x) {
3535
if constexpr (std::is_same_v<src_t, dest_t>) {
3636
return x;
3737
} else {
@@ -40,31 +40,31 @@ template<typename src_t, typename dest_t>
4040
}
4141

4242
template<>
43-
__host__ __device__ inline float ggml_cuda_convert_val<nv_bfloat16, float>(nv_bfloat16 x) {
43+
__host__ __device__ inline float ggml_cuda_cast<nv_bfloat16, float>(nv_bfloat16 x) {
4444
return __bfloat162float(x);
4545
}
4646

4747
template<>
48-
__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<float, nv_bfloat16>(float x) {
48+
__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<float, nv_bfloat16>(float x) {
4949
return __float2bfloat16(x);
5050
}
5151

5252
template<>
53-
__host__ __device__ inline half ggml_cuda_convert_val<nv_bfloat16, half>(nv_bfloat16 x) {
53+
__host__ __device__ inline half ggml_cuda_cast<nv_bfloat16, half>(nv_bfloat16 x) {
5454
return half(__bfloat162float(x));
5555
}
5656

5757
template<>
58-
__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<half, nv_bfloat16>(half x) {
58+
__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<half, nv_bfloat16>(half x) {
5959
return __float2bfloat16(float(x));
6060
}
6161

6262
template<>
63-
__host__ __device__ inline int ggml_cuda_convert_val<nv_bfloat16, int>(nv_bfloat16 x) {
63+
__host__ __device__ inline int ggml_cuda_cast<nv_bfloat16, int>(nv_bfloat16 x) {
6464
return int(__bfloat162float(x));
6565
}
6666

6767
template<>
68-
__host__ __device__ inline nv_bfloat16 ggml_cuda_convert_val<int, nv_bfloat16>(int x) {
68+
__host__ __device__ inline nv_bfloat16 ggml_cuda_cast<int, nv_bfloat16>(int x) {
6969
return __float2bfloat16(float(x));
7070
}

ggml/src/ggml-cuda/cpy-utils.cuh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@
55

66
template<typename src_t, typename dst_t>
77
static __device__ __forceinline__ void convert_flt(const src_t * src, dst_t * dst) {
8-
if constexpr (std::is_same_v<src_t, dst_t>) {
9-
*dst = *src;
10-
} else {
11-
*dst = ggml_cuda_convert_val<src_t, dst_t>(*src);
12-
}
8+
*dst = ggml_cuda_cast<src_t, dst_t>(*src);
139
}
1410

1511
static __device__ __forceinline__ int best_index_int8(int n, const int8_t * val, float x) {

ggml/src/ggml-cuda/getrows.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ static __global__ void k_get_rows(
3535
dfloat2 v;
3636
dequantize_kernel(src0_row, ib, iqs, v);
3737

38-
dst_row[iybs + iqs + 0] = ggml_cuda_convert_val<float, dst_t>(v.x);
39-
dst_row[iybs + iqs + y_offset] = ggml_cuda_convert_val<float, dst_t>(v.y);
38+
dst_row[iybs + iqs + 0] = ggml_cuda_cast<float, dst_t>(v.x);
39+
dst_row[iybs + iqs + y_offset] = ggml_cuda_cast<float, dst_t>(v.y);
4040
}
4141

4242
template<typename src0_t, typename dst_t>
@@ -63,7 +63,7 @@ static __global__ void k_get_rows_float(
6363
dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3;
6464
const src0_t * src0_row = (const src0_t *)((const char *) src0 + i01*nb01 + i11*nb02 + i12*nb03);
6565

66-
dst_row[i00] = ggml_cuda_convert_val<src0_t, dst_t>(src0_row[i00]);
66+
dst_row[i00] = ggml_cuda_cast<src0_t, dst_t>(src0_row[i00]);
6767
}
6868

6969
template<typename grad_t, typename dst_t>

ggml/src/ggml-cuda/mmvf.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ static __global__ void mul_mat_vec_f(
9494
#pragma unroll
9595
for (int j = 0; j < ncols_dst; ++j) {
9696
const float2 tmpy = y2[j*stride_col_y2 + col2];
97-
sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[0]) * tmpy.x;
98-
sumf[j] += ggml_cuda_convert_val<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[1]) * tmpy.y;
97+
sumf[j] += ggml_cuda_cast<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[0]) * tmpy.x;
98+
sumf[j] += ggml_cuda_cast<nv_bfloat16, float>(reinterpret_cast<const nv_bfloat16 *>(&tmpx)[1]) * tmpy.y;
9999
}
100100
}
101101
} else {

0 commit comments

Comments
 (0)