Skip to content

Commit eafb220

Browse files
committed
test 2 with reviewed comments
1 parent 7393cdb commit eafb220

File tree

1 file changed

+2
-10
lines changed

1 file changed

+2
-10
lines changed

ggml/src/ggml-cuda/conv2d.cu

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "conv2d.cuh"
2+
#include "convert.cuh"
23

34
struct conv_params {
45
const int64_t IW, IH;
@@ -17,15 +18,6 @@ struct kernel_bounds {
1718
int64_t x_min, x_max;
1819
};
1920

20-
template<typename T>
21-
__device__ __forceinline__ float to_float(const T& val) {
22-
if constexpr (std::is_same_v<T, __half>) {
23-
return __half2float(val);
24-
} else {
25-
return val; // Assumes T is float
26-
}
27-
}
28-
2921
__device__ __forceinline__ int64_t max64(int64_t a, int64_t b) {
3022
return (a > b) ? a : b;
3123
}
@@ -104,7 +96,7 @@ static __global__ void conv2d_kernel(const float * __restrict__ input,
10496

10597
const float input_val = input[Layout::input_index(n, c_in, in_y, in_x, P)];
10698
const T kernel_val = kernel[Layout::kernel_index(c_out, c_in, ky, kx, P)];
107-
acc += (input_val * to_float(kernel_val));
99+
acc += (input_val * ggml_cuda_cast<float, T>(kernel_val));
108100
}
109101
}
110102
}

0 commit comments

Comments
 (0)