Skip to content

Commit 7c859d0

Browse files
committed
Remove unused parameter after refactoring q4_k
the `nblocks` parameter can be removed from the function call, by using the `d_offset` pair. Signed-off-by: nscipione <[email protected]>
1 parent f66d799 commit 7c859d0

File tree

3 files changed

+9
-11
lines changed

3 files changed

+9
-11
lines changed

ggml/src/ggml-sycl/mmvq.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r
3131

3232
float partial_sum = 0.0f;
3333
for (int i = sg.get_local_linear_id() / block_elements_per_subgroup; i < blocks_per_row; i += blocks_per_subgroup) {
34-
const int ibx = row * blocks_per_row + i; // x block index
34+
const int ibx = row * blocks_per_row + i; // x block index
3535

3636
const auto bx_offset = block_type::get_block_offset(ibx, nblocks);
3737
const auto d_offset = block_type::get_d_offset(nrows, ncols, ibx);
@@ -45,7 +45,7 @@ static void mul_mat_vec_q_reorder(const void * __restrict__ vx, const void * __r
4545
// x block quant index when casting the quants to int
4646
const int iqs = elem + block_traits::vdr_mmvq * (sg.get_local_linear_id() % block_elements_per_subgroup);
4747

48-
partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs, nblocks);
48+
partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs);
4949
}
5050
}
5151

ggml/src/ggml-sycl/quants.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,13 @@ template <> struct block_q_t<GGML_TYPE_Q4_K> {
7272

7373
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
7474
auto nblocks = (nrows * (ncols / traits::qk));
75-
return { (nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2)), 0 };
75+
return { nblocks * (QK_K / 2),
76+
(nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2)) };
7677
}
7778

7879
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
7980

8081
constexpr size_t get_total_qs_bytes(int nblocks) { return nblocks * QK_K / 2; }
81-
82-
constexpr int get_dm_offset(int nblocks) { return get_total_qs_bytes(nblocks) + nblocks * K_SCALE_SIZE; }
8382
};
8483

8584
template <> struct block_q_t<GGML_TYPE_Q6_K> {

ggml/src/ggml-sycl/vecdotq.hpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0> {
286286

287287
__dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
288288
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr,
289-
const sycl::half2 * q8_1_ds, const int & iqs, int /* nblocks */) {
289+
const sycl::half2 * q8_1_ds, const int & iqs) {
290290
const uint8_t * bq4_0 = static_cast<const uint8_t *>(vbq) + ibx_offset.first;
291291
const ggml_half d = *(reinterpret_cast<const ggml_half *>(static_cast<const uint8_t *>(vbq) + d_offset.first));
292292
int v[q4_0_traits::vdr_mmvq];
@@ -349,14 +349,13 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K> {
349349

350350
__dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
351351
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr,
352-
const sycl::half2 * q8_1_ds, const int & iqs, int nblocks) {
352+
const sycl::half2 * q8_1_ds, const int & iqs) {
353353
const int ib = ibx_offset.first / (QK_K / 2);
354354

355355
const uint8_t * base = static_cast<const uint8_t *>(vbq);
356356
const uint8_t * qs = base + ibx_offset.first;
357-
const int total_qs_bytes = nblocks * (QK_K / 2);
358-
const uint8_t * scs = base + total_qs_bytes + ib * K_SCALE_SIZE;
359-
const ggml_half2 * dms = reinterpret_cast<const ggml_half2 *>(base + d_offset.first);
357+
const uint8_t * scs = base + d_offset.first + ib * K_SCALE_SIZE;
358+
const ggml_half2 * dms = reinterpret_cast<const ggml_half2 *>(base + d_offset.second);
360359

361360
const int bq8_offset = QR4_K * ((iqs / 2) / (QI8_1 / 2));
362361
const int * q4 = (const int *) (qs + 16 * bq8_offset + 4 * ((iqs / 2) % 4));
@@ -427,7 +426,7 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K> {
427426

428427
float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
429428
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr, const sycl::half2 * q8_1_ds,
430-
const int & iqs, int /* nblocks */) {
429+
const int & iqs) {
431430
const int ib = ibx_offset.first / (QK_K / 2);
432431

433432
const uint8_t * base = static_cast<const uint8_t *>(vbq);

0 commit comments

Comments
 (0)