Remove unused parameter after refactoring q4_k

s-Nick · s-Nick · commit 7c859d08d541 · 2025-06-05T09:54:01.000+01:00
the `nblocks` parameter can be removed from the function call, by using
the `d_offset` pair.

Signed-off-by: nscipione &lt;nicolo.scipione@codeplay.com&gt;
diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp
@@ -31,7 +31,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r
 
     float partial_sum = 0.0f;
     for (int i = sg.get_local_linear_id() / block_elements_per_subgroup; i < blocks_per_row; i += blocks_per_subgroup) {
-        const int ibx       = row * blocks_per_row + i;  // x block index
+        const int ibx = row * blocks_per_row + i;  // x block index
 
         const auto         bx_offset      = block_type::get_block_offset(ibx, nblocks);
         const auto         d_offset       = block_type::get_d_offset(nrows, ncols, ibx);
@@ -45,7 +45,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r
             // x block quant index when casting the quants to int
             const int iqs = elem + block_traits::vdr_mmvq * (sg.get_local_linear_id() % block_elements_per_subgroup);
 
-            partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs, nblocks);
+            partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs);
         }
     }
 
diff --git a/ggml/src/ggml-sycl/quants.hpp b/ggml/src/ggml-sycl/quants.hpp
@@ -72,14 +72,13 @@ template <> struct block_q_t<GGML_TYPE_Q4_K> {
 
     static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
         auto nblocks = (nrows * (ncols / traits::qk));
-        return { (nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2)), 0 };
+        return { nblocks * (QK_K / 2),
+                 (nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2)) };
     }
 
     static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
 
     constexpr size_t get_total_qs_bytes(int nblocks) { return nblocks * QK_K / 2; }
-
-    constexpr int get_dm_offset(int nblocks) { return get_total_qs_bytes(nblocks) + nblocks * K_SCALE_SIZE; }
 };
 
 template <> struct block_q_t<GGML_TYPE_Q6_K> {
diff --git a/ggml/src/ggml-sycl/vecdotq.hpp b/ggml/src/ggml-sycl/vecdotq.hpp
@@ -286,7 +286,7 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0> {
 
     __dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
                                      const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr,
-                                     const sycl::half2 * q8_1_ds, const int & iqs, int /* nblocks */) {
+                                     const sycl::half2 * q8_1_ds, const int & iqs) {
         const uint8_t * bq4_0 = static_cast<const uint8_t *>(vbq) + ibx_offset.first;
         const ggml_half d = *(reinterpret_cast<const ggml_half *>(static_cast<const uint8_t *>(vbq) + d_offset.first));
         int             v[q4_0_traits::vdr_mmvq];
@@ -349,14 +349,13 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K> {
 
     __dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
                                      const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr,
-                                     const sycl::half2 * q8_1_ds, const int & iqs, int nblocks) {
+                                     const sycl::half2 * q8_1_ds, const int & iqs) {
         const int ib = ibx_offset.first / (QK_K / 2);
 
         const uint8_t *    base           = static_cast<const uint8_t *>(vbq);
         const uint8_t *    qs             = base + ibx_offset.first;
-        const int          total_qs_bytes = nblocks * (QK_K / 2);
-        const uint8_t *    scs            = base + total_qs_bytes + ib * K_SCALE_SIZE;
-        const ggml_half2 * dms            = reinterpret_cast<const ggml_half2 *>(base + d_offset.first);
+        const uint8_t *    scs            = base + d_offset.first + ib * K_SCALE_SIZE;
+        const ggml_half2 * dms            = reinterpret_cast<const ggml_half2 *>(base + d_offset.second);
 
         const int        bq8_offset = QR4_K * ((iqs / 2) / (QI8_1 / 2));
         const int *      q4         = (const int *) (qs + 16 * bq8_offset + 4 * ((iqs / 2) % 4));
@@ -427,7 +426,7 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K> {
 
     float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
                      const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr, const sycl::half2 * q8_1_ds,
-                     const int & iqs, int /* nblocks */) {
+                     const int & iqs) {
         const int ib = ibx_offset.first / (QK_K / 2);
 
         const uint8_t *   base   = static_cast<const uint8_t *>(vbq);

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r`
`31`	`31`
`32`	`32`	`float partial_sum = 0.0f;`
`33`	`33`	`for (int i = sg.get_local_linear_id() / block_elements_per_subgroup; i < blocks_per_row; i += blocks_per_subgroup) {`
`34`		`- const int ibx = row * blocks_per_row + i; // x block index`
	`34`	`+ const int ibx = row * blocks_per_row + i; // x block index`
`35`	`35`
`36`	`36`	`const auto bx_offset = block_type::get_block_offset(ibx, nblocks);`
`37`	`37`	`const auto d_offset = block_type::get_d_offset(nrows, ncols, ibx);`
`@@ -45,7 +45,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r`
`45`	`45`	`// x block quant index when casting the quants to int`
`46`	`46`	`const int iqs = elem + block_traits::vdr_mmvq * (sg.get_local_linear_id() % block_elements_per_subgroup);`
`47`	`47`
`48`		`- partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs, nblocks);`
	`48`	`+ partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs);`
`49`	`49`	`}`
`50`	`50`	`}`
`51`	`51`