Add more prints

Rbiessy · Rbiessy · commit cb7d9cba9fe5 · 2025-05-22T10:43:45.000+01:00
diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp
@@ -506,7 +506,7 @@ void debug_print_array(const std::string& prefix, const T array[N]) {
     GGML_SYCL_DEBUG("%s", ss.str().c_str());
 }
 
-inline void debug_print_tensor(const std::string& prefix, const ggml_tensor* tensor) {
+inline void debug_print_tensor(const std::string& prefix, const ggml_tensor* tensor, const std::string& suffix = "") {
     GGML_SYCL_DEBUG("%s=", prefix.c_str());
     if (tensor) {
         GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
@@ -521,14 +521,15 @@ inline void debug_print_tensor(const std::string& prefix, const ggml_tensor* ten
     } else {
         GGML_SYCL_DEBUG("nullptr");
     }
+    GGML_SYCL_DEBUG("%s", suffix.c_str());
 }
 
 struct scope_op_debug_print {
   scope_op_debug_print(const std::string& func, const ggml_tensor* dst, std::size_t num_src, const std::string& suffix = "") : func(func) {
     if (!g_ggml_sycl_debug) {
         return;
     }
-    GGML_SYCL_DEBUG("call %s:", func.c_str());
+    GGML_SYCL_DEBUG("[SYCL][OP] call %s:", func.c_str());
     debug_print_tensor(" dst", dst);
     if (dst) {
         for (std::size_t i = 0; i < num_src; ++i) {
@@ -539,7 +540,7 @@ struct scope_op_debug_print {
   }
 
   ~scope_op_debug_print() {
-    GGML_SYCL_DEBUG("call %s done\n", func.c_str());
+    GGML_SYCL_DEBUG("[SYCL][OP] call %s done\n", func.c_str());
   }
 
   private:
diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp
@@ -1092,7 +1092,8 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
         src0->type == GGML_TYPE_Q8_0 || src0->type == GGML_TYPE_F16;
 
     if (src1_convert_f16) {
-        scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2, " : converting src1 to fp16");
+        scope_op_debug_print scope_dbg_print(std::string(__func__) + "to_fp16_sycl", dst, /*num_src=*/2,
+                                             " : converting src1 to fp16");
         src1_dfloat = src1_dfloat_a.alloc(ne00);
         const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
         GGML_ASSERT(to_fp16_sycl != nullptr);
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -346,6 +346,8 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) {
 static enum ggml_status
 ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
                                      ggml_tensor *tensor) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor, "\n");
     ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
 
     if (tensor->view_src != NULL) {
@@ -381,7 +383,9 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
                                                 ggml_tensor *tensor,
                                                 const void *data, size_t offset,
                                                 size_t size) try {
-
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
     ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
     ggml_sycl_set_device(ctx->device);
     auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
@@ -406,7 +410,9 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
                                                 const ggml_tensor *tensor,
                                                 void *data, size_t offset,
                                                 size_t size) try {
-
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
     ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
 
     ggml_sycl_set_device(ctx->device);
@@ -434,7 +440,12 @@ static bool
 ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
                                     const ggml_tensor *src,
                                     ggml_tensor *dst) try {
-    if (ggml_backend_buffer_is_sycl(src->buffer)) {
+    bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": dst=", dst);
+    debug_print_tensor(" src=", src);
+    GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
+    if (is_cpy_supported) {
         ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
         ggml_backend_sycl_buffer_context * dst_ctx = (ggml_backend_sycl_buffer_context *)dst->buffer->context;
 
@@ -491,7 +502,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
 
 static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
                                            uint8_t value) try {
-     ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
+    GGML_SYCL_DEBUG("[SYCL] call %s: size=%zu\n", __func__, buffer->size);
+    ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
 
     ggml_sycl_set_device(ctx->device);
     queue_ptr stream = ctx->stream;
@@ -510,7 +522,9 @@ catch (sycl::exception const &exc) {
 
 static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
                                                    size_t offset, size_t size) {
-    GGML_SYCL_DEBUG(" [SYCL] call %s\n", __func__);
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
     ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
     SYCL_CHECK(ggml_sycl_set_device(ctx->device));
     auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
@@ -788,6 +802,8 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff
 static enum ggml_status
 ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
                                            ggml_tensor *tensor) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor, "\n");
     GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
 
     ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
@@ -872,6 +888,9 @@ static void
 ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
                                           ggml_tensor *tensor, const void *data,
                                           size_t offset, size_t size) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
     // split tensors must always be set in their entirety at once
     GGML_ASSERT(offset == 0);
     GGML_ASSERT(size == ggml_nbytes(tensor));
@@ -925,6 +944,9 @@ static void
 ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
                                           const ggml_tensor *tensor, void *data,
                                           size_t offset, size_t size) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
     // split tensors must always be set in their entirety at once
     GGML_ASSERT(offset == 0);
     GGML_ASSERT(size == ggml_nbytes(tensor));
@@ -3723,6 +3745,9 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
                                                ggml_tensor *tensor,
                                                const void *data, size_t offset,
                                                size_t size) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
     ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
     ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
 
@@ -3741,6 +3766,9 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
                                                const ggml_tensor *tensor,
                                                void *data, size_t offset,
                                                size_t size) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": tensor=", tensor);
+    GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
     ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
     ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
 
@@ -3759,7 +3787,12 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
                                                const ggml_tensor *src,
                                                ggml_tensor *dst) try {
     ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
-    if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer)) {
+    bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer);
+    GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+    debug_print_tensor(": dst=", dst);
+    debug_print_tensor(" src=", src);
+    GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
+    if (is_cpy_supported) {
         /*
         DPCT1009:215: SYCL uses exceptions to report errors and does not use the
         error codes. The original code was commented out and a warning string
@@ -3780,6 +3813,7 @@ catch (sycl::exception const &exc) {
 }
 
 static void ggml_backend_sycl_synchronize(ggml_backend_t backend) try {
+    GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
     ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
     const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
     SYCL_CHECK(CHECK_TRY_ERROR((stream)->wait()));
@@ -3881,7 +3915,7 @@ catch (sycl::exception const &exc)
 }
 
 static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
-
+    GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
     sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
 
     if (ggml_backend_is_sycl(backend)) {
@@ -4276,6 +4310,7 @@ static void ggml_backend_sycl_device_event_free(ggml_backend_dev_t dev, ggml_bac
 
 static void ggml_backend_sycl_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) try {
   GGML_UNUSED(dev);
+  GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
 
   sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
   SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));

Original file line number	Diff line number	Diff line change
`@@ -506,7 +506,7 @@ void debug_print_array(const std::string& prefix, const T array[N]) {`
`506`	`506`	`GGML_SYCL_DEBUG("%s", ss.str().c_str());`
`507`	`507`	`}`
`508`	`508`
`509`		`-inline void debug_print_tensor(const std::string& prefix, const ggml_tensor* tensor) {`
	`509`	`+inline void debug_print_tensor(const std::string& prefix, const ggml_tensor* tensor, const std::string& suffix = "") {`
`510`	`510`	`GGML_SYCL_DEBUG("%s=", prefix.c_str());`
`511`	`511`	`if (tensor) {`
`512`	`512`	`GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));`
`@@ -521,14 +521,15 @@ inline void debug_print_tensor(const std::string& prefix, const ggml_tensor* ten`
`521`	`521`	`} else {`
`522`	`522`	`GGML_SYCL_DEBUG("nullptr");`
`523`	`523`	`}`
	`524`	`+ GGML_SYCL_DEBUG("%s", suffix.c_str());`
`524`	`525`	`}`
`525`	`526`
`526`	`527`	`struct scope_op_debug_print {`
`527`	`528`	`scope_op_debug_print(const std::string& func, const ggml_tensor* dst, std::size_t num_src, const std::string& suffix = "") : func(func) {`
`528`	`529`	`if (!g_ggml_sycl_debug) {`
`529`	`530`	`return;`
`530`	`531`	`}`
`531`		`- GGML_SYCL_DEBUG("call %s:", func.c_str());`
	`532`	`+ GGML_SYCL_DEBUG("[SYCL][OP] call %s:", func.c_str());`
`532`	`533`	`debug_print_tensor(" dst", dst);`
`533`	`534`	`if (dst) {`
`534`	`535`	`for (std::size_t i = 0; i < num_src; ++i) {`
`@@ -539,7 +540,7 @@ struct scope_op_debug_print {`
`539`	`540`	`}`
`540`	`541`
`541`	`542`	`~scope_op_debug_print() {`
`542`		`- GGML_SYCL_DEBUG("call %s done\n", func.c_str());`
	`543`	`+ GGML_SYCL_DEBUG("[SYCL][OP] call %s done\n", func.c_str());`
`543`	`544`	`}`
`544`	`545`
`545`	`546`	`private:`