clang-format

Rbiessy · Rbiessy · commit 1ae14e0f0e95 · 2025-05-22T10:43:45.000+01:00
diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp
@@ -47,11 +47,11 @@ extern int g_ggml_sycl_prioritize_dmmv;
 
 #if defined(__clang__) && __has_builtin(__builtin_expect)
 // Hint the optimizer to pipeline the more likely following instruction in branches
-#define LIKELY(expr) __builtin_expect(expr, true)
-#define UNLIKELY(expr) __builtin_expect(expr, false)
+#    define LIKELY(expr)   __builtin_expect(expr, true)
+#    define UNLIKELY(expr) __builtin_expect(expr, false)
 #else
-#define LIKELY(expr) (expr)
-#define UNLIKELY(expr) (expr)
+#    define LIKELY(expr)   (expr)
+#    define UNLIKELY(expr) (expr)
 #endif
 
 #define GGML_SYCL_DEBUG(...)              \
@@ -540,23 +540,23 @@ inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * t
 }
 
 struct scope_op_debug_print {
-  scope_op_debug_print(const std::string& func, const ggml_tensor* dst, std::size_t num_src, const std::string& suffix = "") : func(func) {
-    if (LIKELY(!g_ggml_sycl_debug)) {
-        return;
-    }
-    GGML_SYCL_DEBUG("[SYCL][OP] call %s:", func.c_str());
-    debug_print_tensor(" dst", dst);
-    if (dst) {
-        for (std::size_t i = 0; i < num_src; ++i) {
-            debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
+    scope_op_debug_print(const std::string & func, const ggml_tensor * dst, std::size_t num_src,
+                         const std::string & suffix = "") :
+        func(func) {
+        if (LIKELY(!g_ggml_sycl_debug)) {
+            return;
         }
+        GGML_SYCL_DEBUG("[SYCL][OP] call %s:", func.c_str());
+        debug_print_tensor(" dst", dst);
+        if (dst) {
+            for (std::size_t i = 0; i < num_src; ++i) {
+                debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
+            }
+        }
+        GGML_SYCL_DEBUG("%s\n", suffix.c_str());
     }
-    GGML_SYCL_DEBUG("%s\n", suffix.c_str());
-  }
 
-  ~scope_op_debug_print() {
-    GGML_SYCL_DEBUG("[SYCL][OP] call %s done\n", func.c_str());
-  }
+    ~scope_op_debug_print() { GGML_SYCL_DEBUG("[SYCL][OP] call %s done\n", func.c_str()); }
 
   private:
     std::string func;
diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp
@@ -159,40 +159,37 @@ static void concat_f32_sycl_non_cont(
 }
 
 void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
-  scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
-  const ggml_tensor *src0 = dst->src[0];
-  const ggml_tensor *src1 = dst->src[1];
-  queue_ptr stream = ctx.stream();
-
-  const int32_t dim = ((int32_t *)dst->op_params)[0];
-
-  if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
-    const float *src0_d = (const float *)src0->data;
-    const float *src1_d = (const float *)src1->data;
-
-    float *dst_d = (float *)dst->data;
-
-    if (dim != 3) {
-      for (int i3 = 0; i3 < dst->ne[3]; i3++) {
-        concat_f32_sycl(
-            src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
-            dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1],
-            src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream);
-      }
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
+    const ggml_tensor *  src0   = dst->src[0];
+    const ggml_tensor *  src1   = dst->src[1];
+    queue_ptr            stream = ctx.stream();
+
+    const int32_t dim = ((int32_t *) dst->op_params)[0];
+
+    if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
+        const float * src0_d = (const float *) src0->data;
+        const float * src1_d = (const float *) src1->data;
+
+        float * dst_d = (float *) dst->data;
+
+        if (dim != 3) {
+            for (int i3 = 0; i3 < dst->ne[3]; i3++) {
+                concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
+                                dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
+                                dst->ne[1], dst->ne[2], dim, stream);
+            }
+        } else {
+            const size_t size0 = ggml_nbytes(src0);
+            const size_t size1 = ggml_nbytes(src1);
+
+            SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
+            SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
+        }
     } else {
-      const size_t size0 = ggml_nbytes(src0);
-      const size_t size1 = ggml_nbytes(src1);
-
-      SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
-      SYCL_CHECK(CHECK_TRY_ERROR(
-          stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
+        concat_f32_sycl_non_cont(stream, (const char *) src0->data, (const char *) src1->data, (char *) dst->data,
+                                 src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1],
+                                 src0->nb[2], src0->nb[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
+                                 src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
+                                 dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
     }
-  } else
-    concat_f32_sycl_non_cont(
-        stream, (const char *)src0->data, (const char *)src1->data,
-        (char *)dst->data, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
-        src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src1->ne[0],
-        src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1],
-        src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
-        dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
 }
diff --git a/ggml/src/ggml-sycl/cpy.cpp b/ggml/src/ggml-sycl/cpy.cpp
@@ -617,7 +617,8 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co
 
 void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
     // Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
-    scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, std::string(" src0 type=") + ggml_type_name(src0->type));
+    scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0,
+                                         std::string(" src0 type=") + ggml_type_name(src0->type));
     const int64_t ne = ggml_nelements(src0);
     GGML_ASSERT(ne == ggml_nelements(src1));
 
diff --git a/ggml/src/ggml-sycl/getrows.cpp b/ggml/src/ggml-sycl/getrows.cpp
@@ -257,7 +257,7 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
     GGML_UNUSED(ctx);
 }
 
-void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
     GGML_ASSERT(dst->type == GGML_TYPE_F32);
 
@@ -307,4 +307,3 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
             GGML_ABORT("fatal error");
     }
 }
-
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -503,7 +503,7 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
 static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
                                            uint8_t value) try {
     GGML_SYCL_DEBUG("[SYCL] call %s: size=%zu\n", __func__, buffer->size);
-    ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
+    ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
 
     ggml_sycl_set_device(ctx->device);
     queue_ptr stream = ctx->stream;
@@ -2036,12 +2036,12 @@ inline void ggml_sycl_op_mul_mat_sycl(
 #else
     bool use_fp16 = false;
 #endif
-    if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
-        use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] &&
-        dst->op_params[0] == GGML_PREC_DEFAULT) {
+    if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && use_fp16 && ggml_is_contiguous(src0) &&
+        row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
         ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
         if (src0->type != GGML_TYPE_F16) {
-            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp16_sycl", dst, /*num_src=*/2, " : converting src0 to fp16");
+            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp16_sycl", dst, /*num_src=*/2,
+                                                 " : converting src0 to fp16");
             const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type, dst);
             GGML_ASSERT(to_fp16_sycl != nullptr);
             size_t ne = row_diff*ne00;
@@ -2054,7 +2054,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
 
         ggml_sycl_pool_alloc<sycl::half> src1_as_f16(ctx.pool());
         if (src1->type != GGML_TYPE_F16) {
-            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp16_sycl", dst, /*num_src=*/2, " : converting src1 to fp16");
+            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp16_sycl", dst, /*num_src=*/2,
+                                                 " : converting src1 to fp16");
             const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
             GGML_ASSERT(to_fp16_sycl != nullptr);
             size_t ne = src1_ncols*ne10;
@@ -2071,7 +2072,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
             DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
                                       DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
                                       dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
-            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2, " : converting dst to fp32");
+            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2,
+                                                 " : converting dst to fp32");
             const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
             to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
         }
@@ -2087,23 +2089,25 @@ inline void ggml_sycl_op_mul_mat_sycl(
                 src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
                 dst_f16.get(), dpct::library_data_t::real_half, ldc,
                 dpct::library_data_t::real_half)));
-            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2, " : converting dst to fp32");
+            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2,
+                                                 " : converting dst to fp32");
             const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
             to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
         }
-    }
-    else {
+    } else {
         ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
         ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
         if (src0->type != GGML_TYPE_F32) {
-            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2, " : converting src0 to fp32");
+            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2,
+                                                 " : converting src0 to fp32");
             const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src0->type, dst);
             GGML_ASSERT(to_fp32_sycl != nullptr);
             src0_ddq_as_f32.alloc(row_diff*ne00);
             to_fp32_sycl(src0_dd_i, src0_ddq_as_f32.get(), row_diff*ne00, stream);
         }
         if (src1->type != GGML_TYPE_F32) {
-            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2, " : converting src1 to fp32");
+            scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp32_sycl", dst, /*num_src=*/2,
+                                                 " : converting src1 to fp32");
             const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src1->type, dst);
             GGML_ASSERT(to_fp32_sycl != nullptr);
             src1_ddq_as_f32.alloc(src1_ncols*ne10);
@@ -2139,7 +2143,7 @@ catch (sycl::exception const &exc) {
   std::exit(1);
 }
 
-static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     dpct::queue_ptr main_stream = ctx.stream();
@@ -2191,7 +2195,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
     sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
 }
 
-inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     dpct::queue_ptr main_stream = ctx.stream();
@@ -2222,7 +2226,7 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor *
     argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream);
 }
 
-inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_I32);
 
@@ -2237,7 +2241,7 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *ds
     argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
 }
 
-inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tensor *dst) {
+inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     dpct::queue_ptr main_stream = ctx.stream();
@@ -2254,7 +2258,7 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tens
     diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
 }
 
-inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     dpct::queue_ptr main_stream = ctx.stream();
@@ -2441,7 +2445,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
             dev[i].src1_ddq = dev[i].src1_ddq_alloc.alloc(ctx.pool(i), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
 
             if (src1_on_device && src1_is_contiguous) {
-                scope_op_debug_print scope_dbg_print(std::string(__func__) + "/quantize_row_q8_1_sycl", dst, /*num_src=*/2, " : converting src1 to Q8_1");
+                scope_op_debug_print scope_dbg_print(std::string(__func__) + "/quantize_row_q8_1_sycl", dst,
+                                                     /*num_src=*/2, " : converting src1 to Q8_1");
                 quantize_row_q8_1_sycl(dev[i].src1_ddf, dev[i].src1_ddq, ne10, nrows1, src1_padded_col_size, stream);
                 /*
                 DPCT1010:90: SYCL uses exceptions to report errors and does not
@@ -2546,7 +2551,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
                 }
 
                 if (convert_src1_to_q8_1 && !src1_is_contiguous) {
-                    scope_op_debug_print scope_dbg_print(std::string(__func__) + "/quantize_row_q8_1_sycl", dst, /*num_src=*/2, " : converting src1 to Q8_1");
+                    scope_op_debug_print scope_dbg_print(std::string(__func__) + "/quantize_row_q8_1_sycl", dst,
+                                                         /*num_src=*/2, " : converting src1 to Q8_1");
                     quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
                     /*
                     DPCT1010:92: SYCL uses exceptions to report errors and does
@@ -2790,7 +2796,8 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
 
     // convert src1 to fp16
     if (src1->type != GGML_TYPE_F16) {
-        scope_op_debug_print scope_dbg_print(std::string(__func__) + "/to_fp16_nc_sycl", dst, /*num_src=*/2, " : converting src1 to fp16");
+        scope_op_debug_print    scope_dbg_print(std::string(__func__) + "/to_fp16_nc_sycl", dst, /*num_src=*/2,
+                                                " : converting src1 to fp16");
         const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type);
         GGML_ASSERT(to_fp16_nc_sycl != nullptr);
         const int64_t ne_src1 = ggml_nelements(src1);
@@ -3787,7 +3794,8 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
                                                const ggml_tensor *src,
                                                ggml_tensor *dst) try {
     ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
-    bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer);
+    bool is_cpy_supported                = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
+                            ggml_backend_buffer_is_sycl(src->buffer);
     GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
     debug_print_tensor(": dst=", dst);
     debug_print_tensor(" src=", src);
diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp
@@ -57,7 +57,7 @@ static void timestep_embedding_f32_sycl(
 
 void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
-    const ggml_tensor *src0 = dst->src[0];
+    const ggml_tensor *  src0   = dst->src[0];
     const float * src0_d = (const float *)src0->data;
     float * dst_d = (float *)dst->data;
     dpct::queue_ptr stream = ctx.stream();

Original file line number	Diff line number	Diff line change
`@@ -257,7 +257,7 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens`
`257`	`257`	`GGML_UNUSED(ctx);`
`258`	`258`	`}`
`259`	`259`
`260`		`-void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {`
	`260`	`+void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {`
`261`	`261`	`GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);`
`262`	`262`	`GGML_ASSERT(dst->type == GGML_TYPE_F32);`
`263`	`263`
`@@ -307,4 +307,3 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {`
`307`	`307`	`GGML_ABORT("fatal error");`
`308`	`308`	`}`
`309`	`309`	`}`
`310`		`-`