1414#ifndef GGML_SYCL_QUANTS_HPP
1515#define GGML_SYCL_QUANTS_HPP
1616
17+ #include < utility>
18+
1719#include " ggml-common.h"
1820#include " ggml.h"
1921
2022namespace ggml_sycl_reordered {
2123
22-
2324// The reordered block moves quants (qs) and scales(d) to two
2425// uniform regions of memory that is contiguous in the same tensor.
2526// What this means is that instead of having:
@@ -32,7 +33,6 @@ namespace ggml_sycl_reordered {
3233
3334template <ggml_type type> struct block_q_t ;
3435
35-
3636// qk number of weights / quants in a block
3737// qr number of weights in a byte (described as 'before dequantization')
3838// for quantization types that has low and high bits split, qr is calculated with
@@ -47,10 +47,12 @@ template <> struct block_q_t<GGML_TYPE_Q4_0> {
4747 static constexpr uint32_t vdr_mmvq = 2 ;
4848 };
4949
50- static constexpr int get_block_offset (const int block_index) { return block_index * (traits::qk / traits::qr); }
50+ static constexpr std::pair<int , int > get_block_offset (const int block_index, const int /* nblocks */ ) {
51+ return { block_index * (traits::qk / traits::qr), 0 };
52+ }
5153
52- static constexpr int get_d_offset (int nrows, int ncols, const int block_index) {
53- return (ncols / traits::qr * nrows) + block_index * sizeof (ggml_half);
54+ static constexpr std::pair< int , int > get_d_offset (int nrows, int ncols, const int block_index) {
55+ return { (ncols / traits::qr * nrows) + block_index * sizeof (ggml_half), 0 } ;
5456 }
5557
5658 static constexpr int block_to_q8_1_ratio () { return traits::qk / QK8_1; }
@@ -64,20 +66,47 @@ template <> struct block_q_t<GGML_TYPE_Q4_K> {
6466 static constexpr uint32_t vdr_mmvq = 2 ;
6567 };
6668
67- static constexpr int get_block_offset (const int block_index) { return block_index * (traits::qk / traits::qr); }
69+ static constexpr std::pair<int , int > get_block_offset (const int block_index, const int /* nblocks */ ) {
70+ return { block_index * (traits::qk / traits::qr), 0 };
71+ }
6872
69- static constexpr int get_d_offset (int nrows, int ncols, const int block_index) {
73+ static constexpr std::pair< int , int > get_d_offset (int nrows, int ncols, const int block_index) {
7074 auto nblocks = (nrows * (ncols / traits::qk));
71- return (nblocks * QK_K / 2 ) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof (ggml_half2));
75+ return { (nblocks * QK_K / 2 ) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof (ggml_half2)), 0 } ;
7276 }
7377
7478 static constexpr int block_to_q8_1_ratio () { return traits::qk / QK8_1; }
7579
7680 constexpr size_t get_total_qs_bytes (int nblocks) { return nblocks * QK_K / 2 ; }
7781
78- constexpr size_t get_dm_offset (int nblocks) { return get_total_qs_bytes (nblocks) + nblocks * K_SCALE_SIZE; }
82+ constexpr int get_dm_offset (int nblocks) { return get_total_qs_bytes (nblocks) + nblocks * K_SCALE_SIZE; }
7983};
8084
85+ template <> struct block_q_t <GGML_TYPE_Q6_K> {
86+ struct traits {
87+ static constexpr uint32_t qk = QK_K;
88+ static constexpr uint32_t qi = QI6_K;
89+ static constexpr uint32_t qr = QR6_K;
90+ static constexpr uint32_t vdr_mmvq = 1 ;
91+ };
92+
93+ static constexpr std::pair<int , int > get_block_offset (const int block_index, const int n_blocks) {
94+ auto low_bits_index = block_index * (traits::qk / traits::qr);
95+ // the index of high bits it's after all low bits
96+ auto high_bits_index = n_blocks * (QK_K / 2 ) + (block_index * (QK_K / 4 ));
97+ return { low_bits_index, high_bits_index };
98+ }
99+
100+ static constexpr std::pair<int , int > get_d_offset (int nrows, int ncols, const int block_index) {
101+ auto nblocks = (nrows * (ncols / traits::qk));
102+ auto total_qs_bytes = nblocks * (QK_K / 2 ) + nblocks * (QK_K / 4 );
103+ auto block_scales = total_qs_bytes + block_index * (QK_K / 16 );
104+ auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16 );
105+ return { block_scales, sb_scale };
106+ }
107+
108+ static constexpr int block_to_q8_1_ratio () { return traits::qk / QK8_1; }
109+ };
81110} // namespace ggml_sycl_reordered
82111
83112#endif // GGML_SYCL_QUANTS_HPP
0 commit comments