@@ -895,43 +895,6 @@ static void clamp_f32(const float * x, float * dst, const float min, const float
895895 dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
896896}
897897
898- template <typename T>
899- static void im2col_kernel (const float *x, T *dst, int offset_delta,
900- int IW, int IH, int OW, int KW, int KH,
901- int pelements, int CHW, int s0, int s1, int p0,
902- int p1, int d0, int d1,
903- const sycl::nd_item<3 > &item_ct1) {
904- const int i = item_ct1.get_local_id (2 ) +
905- item_ct1.get_group (2 ) * item_ct1.get_local_range (2 );
906- if (i >= pelements) {
907- return ;
908- }
909-
910- const int ksize = OW * (KH > 1 ? KW : 1 );
911- const int kx = i / ksize;
912- const int kd = kx * ksize;
913- const int ky = (i - kd) / OW;
914- const int ix = i % OW;
915-
916- const int64_t iiw = ix * s0 + kx * d0 - p0;
917- const int64_t iih = item_ct1.get_group (1 ) * s1 + ky * d1 - p1;
918-
919- const int64_t offset_dst =
920- (item_ct1.get_group (1 ) * OW + ix) * CHW +
921- (item_ct1.get_group (0 ) * (KW * KH) + ky * KW + kx);
922-
923- if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
924- dst[offset_dst] =
925- sycl::vec<float , 1 >(0 .0f )
926- .convert <sycl::half, sycl::rounding_mode::automatic>()[0 ];
927- } else {
928- const int64_t offset_src = item_ct1.get_group (0 ) * offset_delta;
929- dst[offset_dst] =
930- sycl::vec<float , 1 >(x[offset_src + iih * IW + iiw])
931- .convert <sycl::half, sycl::rounding_mode::automatic>()[0 ];
932- }
933- }
934-
935898template <typename Ti, typename To>
936899static void pool2d_nchw_kernel (
937900 const int ih, const int iw, const int oh, const int ow,
@@ -1745,7 +1708,6 @@ static void diag_mask_inf_f32_sycl(const float *x, float *dst,
17451708}
17461709
17471710
1748-
17491711template <typename T>
17501712static void im2col_sycl (const float *x, T *dst, int IW, int IH,
17511713 int OW, int OH, int KW, int KH, int IC,
@@ -2478,47 +2440,6 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
24782440 (void ) src1_dd;
24792441}
24802442
2481- inline void ggml_sycl_op_im2col (ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2482- const ggml_tensor *src1, ggml_tensor *dst,
2483- const float *src0_dd, const float *src1_dd,
2484- float *dst_dd,
2485- const queue_ptr &main_stream) {
2486-
2487- GGML_ASSERT (src0->type == GGML_TYPE_F16);
2488- GGML_ASSERT (src1->type == GGML_TYPE_F32);
2489- GGML_ASSERT ( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
2490-
2491- const int32_t s0 = ((const int32_t *)(dst->op_params ))[0 ];
2492- const int32_t s1 = ((const int32_t *)(dst->op_params ))[1 ];
2493- const int32_t p0 = ((const int32_t *)(dst->op_params ))[2 ];
2494- const int32_t p1 = ((const int32_t *)(dst->op_params ))[3 ];
2495- const int32_t d0 = ((const int32_t *)(dst->op_params ))[4 ];
2496- const int32_t d1 = ((const int32_t *)(dst->op_params ))[5 ];
2497-
2498- const bool is_2D = ((const int32_t *)(dst->op_params ))[6 ] == 1 ;
2499-
2500- const int64_t IC = src1->ne [is_2D ? 2 : 1 ];
2501- const int64_t IH = is_2D ? src1->ne [1 ] : 1 ;
2502- const int64_t IW = src1->ne [0 ];
2503-
2504- const int64_t KH = is_2D ? src0->ne [1 ] : 1 ;
2505- const int64_t KW = src0->ne [0 ];
2506-
2507- const int64_t OH = is_2D ? dst->ne [2 ] : 1 ;
2508- const int64_t OW = dst->ne [1 ];
2509-
2510- const size_t delta_offset = src1->nb [is_2D ? 2 : 1 ] / 4 ; // nb is byte offset, src is type float32
2511-
2512- if (dst->type == GGML_TYPE_F16) {
2513- im2col_sycl (src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
2514- } else {
2515- im2col_sycl (src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
2516- }
2517-
2518- (void ) src0;
2519- (void ) src0_dd;
2520- }
2521-
25222443inline void ggml_sycl_op_sum_rows (ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
25232444 const ggml_tensor *src1, ggml_tensor *dst,
25242445 const float *src0_dd, const float *src1_dd,
0 commit comments