@@ -48,11 +48,11 @@ template <> struct block_q_t<GGML_TYPE_Q4_0> {
4848 };
4949
5050 static constexpr std::pair<int , int > get_block_offset (const int block_index, const int /* nblocks */ ) {
51- return { block_index * (traits::qk / traits::qr ), 0 };
51+ return { block_index * (QK4_0 / QR4_0 ), 0 };
5252 }
5353
5454 static constexpr std::pair<int , int > get_d_offset (int nrows, int ncols, const int block_index) {
55- return { (ncols / traits::qr * nrows) + block_index * sizeof (ggml_half), 0 };
55+ return { (ncols / QR4_0 * nrows) + block_index * sizeof (ggml_half), 0 };
5656 }
5757
5858 static constexpr int block_to_q8_1_ratio () { return traits::qk / QK8_1; }
@@ -71,14 +71,12 @@ template <> struct block_q_t<GGML_TYPE_Q4_K> {
7171 }
7272
7373 static constexpr std::pair<int , int > get_d_offset (int nrows, int ncols, const int block_index) {
74- auto nblocks = (nrows * (ncols / traits::qk ));
75- return { nblocks * (QK_K / 2 ),
74+ auto nblocks = (nrows * (ncols / QK_K ));
75+ return { nblocks * (QK_K / 2 ) + (block_index * K_SCALE_SIZE) ,
7676 (nblocks * QK_K / 2 ) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof (ggml_half2)) };
7777 }
7878
7979 static constexpr int block_to_q8_1_ratio () { return traits::qk / QK8_1; }
80-
81- constexpr size_t get_total_qs_bytes (int nblocks) { return nblocks * QK_K / 2 ; }
8280};
8381
8482template <> struct block_q_t <GGML_TYPE_Q6_K> {
@@ -90,22 +88,23 @@ template <> struct block_q_t<GGML_TYPE_Q6_K> {
9088 };
9189
9290 static constexpr std::pair<int , int > get_block_offset (const int block_index, const int n_blocks) {
93- auto low_bits_index = block_index * (traits::qk / traits::qr );
91+ auto low_bits_index = block_index * (QK_K / QR6_K );
9492 // the index of high bits it's after all low bits
9593 auto high_bits_index = n_blocks * (QK_K / 2 ) + (block_index * (QK_K / 4 ));
9694 return { low_bits_index, high_bits_index };
9795 }
9896
9997 static constexpr std::pair<int , int > get_d_offset (int nrows, int ncols, const int block_index) {
100- auto nblocks = (nrows * (ncols / traits::qk ));
98+ auto nblocks = (nrows * (ncols / QK_K ));
10199 auto total_qs_bytes = nblocks * (QK_K / 2 ) + nblocks * (QK_K / 4 );
102100 auto block_scales = total_qs_bytes + block_index * (QK_K / 16 );
103- auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16 );
101+ auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16 ) + block_index * sizeof (ggml_half) ;
104102 return { block_scales, sb_scale };
105103 }
106104
107105 static constexpr int block_to_q8_1_ratio () { return traits::qk / QK8_1; }
108106};
107+
109108} // namespace ggml_sycl_reordered
110109
111110#endif // GGML_SYCL_QUANTS_HPP
0 commit comments