arthw
diff --git a/‎ggml/src/ggml-sycl.cpp‎
Lines changed: 0 additions & 79 deletions b/‎ggml/src/ggml-sycl.cpp‎
Lines changed: 0 additions & 79 deletions
diff --git a/‎ggml/src/ggml-sycl/backend.hpp‎
Lines changed: 1 addition & 0 deletions b/‎ggml/src/ggml-sycl/backend.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/src/ggml-sycl/common.cpp‎
Lines changed: 10 additions & 0 deletions b/‎ggml/src/ggml-sycl/common.cpp‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎ggml/src/ggml-sycl/common.hpp‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-sycl/common.hpp‎
Lines changed: 2 additions & 0 deletions
@@ -895,43 +895,6 @@ static void clamp_f32(const float * x, float * dst, const float min, const float
     dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
 }
 
-template <typename T>
-static void im2col_kernel(const float *x, T *dst, int offset_delta,
-                           int IW, int IH, int OW, int KW, int KH,
-                           int pelements, int CHW, int s0, int s1, int p0,
-                           int p1, int d0, int d1,
-                           const sycl::nd_item<3> &item_ct1) {
-    const int i = item_ct1.get_local_id(2) +
-                  item_ct1.get_group(2) * item_ct1.get_local_range(2);
-    if (i >= pelements) {
-        return;
-    }
-
-    const int ksize = OW * (KH > 1 ? KW : 1);
-    const int kx = i / ksize;
-    const int kd = kx * ksize;
-    const int ky = (i - kd) / OW;
-    const int ix = i % OW;
-
-    const int64_t iiw = ix * s0 + kx * d0 - p0;
-    const int64_t iih = item_ct1.get_group(1) * s1 + ky * d1 - p1;
-
-    const int64_t offset_dst =
-        (item_ct1.get_group(1) * OW + ix) * CHW +
-        (item_ct1.get_group(0) * (KW * KH) + ky * KW + kx);
-
-    if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
-        dst[offset_dst] =
-            sycl::vec<float, 1>(0.0f)
-                .convert<sycl::half, sycl::rounding_mode::automatic>()[0];
-    } else {
-        const int64_t offset_src = item_ct1.get_group(0) * offset_delta;
-        dst[offset_dst] =
-            sycl::vec<float, 1>(x[offset_src + iih * IW + iiw])
-                .convert<sycl::half, sycl::rounding_mode::automatic>()[0];
-    }
-}
-
 template <typename Ti, typename To>
 static  void pool2d_nchw_kernel(
         const int ih, const int iw, const int oh, const int ow,
@@ -1745,7 +1708,6 @@ static void diag_mask_inf_f32_sycl(const float *x, float *dst,
 }
 
 
-
 template <typename T>
 static void im2col_sycl(const float *x, T *dst, int IW, int IH,
                                 int OW, int OH, int KW, int KH, int IC,
@@ -2478,47 +2440,6 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
     (void) src1_dd;
 }
 
-inline void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
-                                const ggml_tensor *src1, ggml_tensor *dst,
-                                const float *src0_dd, const float *src1_dd,
-                                float *dst_dd,
-                                const queue_ptr &main_stream) {
-
-    GGML_ASSERT(src0->type == GGML_TYPE_F16);
-    GGML_ASSERT(src1->type == GGML_TYPE_F32);
-    GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
-
-    const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
-    const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
-    const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
-    const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
-    const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
-    const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
-
-    const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
-
-    const int64_t IC = src1->ne[is_2D ? 2 : 1];
-    const int64_t IH = is_2D ? src1->ne[1] : 1;
-    const int64_t IW =         src1->ne[0];
-
-    const int64_t KH = is_2D ? src0->ne[1] : 1;
-    const int64_t KW =         src0->ne[0];
-
-    const int64_t OH = is_2D ? dst->ne[2] : 1;
-    const int64_t OW =         dst->ne[1];
-
-    const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
-
-    if (dst->type == GGML_TYPE_F16) {
-        im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
-    } else {
-        im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
-    }
-
-    (void) src0;
-    (void) src0_dd;
-}
-
 inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
                                   const ggml_tensor *src1, ggml_tensor *dst,
                                   const float *src0_dd, const float *src1_dd,
 
@@ -25,5 +25,6 @@
 #include "norm.hpp"
 #include "softmax.hpp"
 #include "tsembd.hpp"
+#include "im2col.hpp"
 
 #endif // GGML_SYCL_BACKEND_HPP
@@ -198,3 +198,13 @@ ggml_sycl_device_info &ggml_sycl_info(int main_gpu_id) {
 }
 
 //--ggml_sycl_device_info--
+int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
+  const int64_t max_range = std::numeric_limits<int>::max();
+  int64_t sycl_down_blk_size = block_size;
+  int64_t global_range = accumulate_block_num * sycl_down_blk_size;
+  while(global_range > max_range) {
+      sycl_down_blk_size /= 2;
+      global_range = accumulate_block_num * sycl_down_blk_size;
+  }
+  return sycl_down_blk_size;
+}
@@ -344,4 +344,6 @@ static __dpct_inline__ Tp* get_pointer(sycl::local_accessor<Tp, dim> acc) {
     return acc.template get_multi_ptr<sycl::access::decorated::no>().get();
 }
 
+int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size);
+
 #endif // GGML_SYCL_COMMON_HPP
Original file line number	Diff line number	Diff line change
`@@ -344,4 +344,6 @@ static __dpct_inline__ Tp* get_pointer(sycl::local_accessor<Tp, dim> acc) {`
`344`	`344`	`return acc.template get_multi_ptr<sycl::access::decorated::no>().get();`
`345`	`345`	`}`
`346`	`346`
	`347`	`+int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size);`
	`348`	`+`
`347`	`349`	`#endif // GGML_SYCL_COMMON_HPP`