diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp
index 4d2f5fbfe96e3..531aca3118a8d 100644
--- a/ggml/src/ggml-sycl/common.cpp
+++ b/ggml/src/ggml-sycl/common.cpp
@@ -23,7 +23,7 @@ void* ggml_sycl_host_malloc(size_t size) try {
   if (getenv("GGML_SYCL_NO_PINNED") != nullptr) {
     return nullptr;
   }
-//   ggml_sycl_info().device_mgr->first_queue
+
   void* ptr = nullptr;
   // allow to use dpct::get_in_order_queue() for host malloc
   auto q = dpct::get_in_order_queue();
@@ -32,7 +32,6 @@ void* ggml_sycl_host_malloc(size_t size) try {
   dpct::err0 err = CHECK_TRY_ERROR(
       ptr = (void*)sycl::malloc_host(size, q));
 
-//  printf("zjy ggml_sycl_host_malloc ptr=%p queue=%p size=%lu \n", ptr,q, size);
   if (err != 0) {
     // clear the error
     GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0,    "syclGetErrorString is not supported");
@@ -113,7 +112,6 @@ void print_device_opt_feature(ggml_sycl_device_info &info) {
     int device_count = info.device_count;
 
     for (int id = 0; id < device_count; ++id) {
-        printf("zjy id=%d\n", id);
         sycl::device device = dpct::dev_mgr::instance().get_device(id);
         std::string backend_type = get_device_backend_and_type(device);
         int type_id = DeviceNums[backend_type]++;
diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp
index 241a2cc6e8679..4a14f258f9312 100644
--- a/ggml/src/ggml-sycl/dmmv.cpp
+++ b/ggml/src/ggml-sycl/dmmv.cpp
@@ -77,8 +77,10 @@ static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat *
     }
 
     // sum up partial sums and write back result
+   const int mask_start = ncols > GGML_SYCL_DMMV_X ? warp_size >> 1 : warp_size >> 2;
+
 #pragma unroll
-    for (int mask = warp_size / 2; mask > 0; mask >>= 1) {
+    for (int mask = mask_start; mask > 0; mask >>= 1) {
         tmp +=
             dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), tmp, mask);
     }