1919
2020namespace  ggml_sycl_reordered  {
2121
22+ 
23+ //  The reordered block moves quants (qs) and  scales(d) to two
24+ //  uniform regions of memory that is contiguous in the same tensor.
25+ //  What this means is that instead of having:
26+ //  [d0, qs0] [d1, qs1] [d2, qs2] ... [dN, qsN]
27+ //  We have:
28+ //  [qs0, qs1, qs2, ..., qsN]  [d0, d1, d2, ..., dN]
29+ // 
30+ //  Notes: out-of-bounds qs will run into d values
31+ //  Aligment relies on the allocated size of qs
32+ 
2233template  <ggml_type type> struct  block_q_t ;
2334
35+ 
36+ //  qk number of weights / quants in a block
37+ //  qr number of weights in a byte (described as 'before dequantization')
38+ //     for quantization types that has low and high bits split, qr is calculated with
39+ //     using the lower bits, e.g for Q6 quants QR6 is 2
40+ //  qi size of a block in 32 bit integers
41+ //  See ggml-common.h to see how these are calculated
2442template  <> struct  block_q_t <GGML_TYPE_Q4_0> {
2543    struct  traits  {
2644        static  constexpr  uint32_t  qk       = QK4_0;
@@ -29,11 +47,6 @@ template <> struct block_q_t<GGML_TYPE_Q4_0> {
2947        static  constexpr  uint32_t  vdr_mmvq = 2 ;
3048    };
3149
32-     //  qs and d are expected to be contiguous in memory
33-     //  out-of-bounds qs will access d values
34-     //  Aligment relies on the allocated size of qs, so other block types
35-     //  may require padding
36- 
3750    static  constexpr  int  get_block_offset (const  int  block_index) { return  block_index * (traits::qk / traits::qr); }
3851
3952    static  constexpr  int  get_d_offset (int  nrows, int  ncols, const  int  block_index) {
0 commit comments