Skip to content

Commit 52b1622

Browse files
committed
sycl : Improved comments in the quants header
Signed-off-by: Alberto Cabrera <[email protected]>
1 parent 9c8d809 commit 52b1622

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

ggml/src/ggml-sycl/quants.hpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,26 @@
1919

2020
namespace ggml_sycl_reordered {
2121

22+
23+
// The reordered block moves quants (qs) and scales(d) to two
24+
// uniform regions of memory that is contiguous in the same tensor.
25+
// What this means is that instead of having:
26+
// [d0, qs0] [d1, qs1] [d2, qs2] ... [dN, qsN]
27+
// We have:
28+
// [qs0, qs1, qs2, ..., qsN] [d0, d1, d2, ..., dN]
29+
//
30+
// Notes: out-of-bounds qs will run into d values
31+
// Aligment relies on the allocated size of qs
32+
2233
template <ggml_type type> struct block_q_t;
2334

35+
36+
// qk number of weights / quants in a block
37+
// qr number of weights in a byte (described as 'before dequantization')
38+
// for quantization types that has low and high bits split, qr is calculated with
39+
// using the lower bits, e.g for Q6 quants QR6 is 2
40+
// qi size of a block in 32 bit integers
41+
// See ggml-common.h to see how these are calculated
2442
template <> struct block_q_t<GGML_TYPE_Q4_0> {
2543
struct traits {
2644
static constexpr uint32_t qk = QK4_0;
@@ -29,11 +47,6 @@ template <> struct block_q_t<GGML_TYPE_Q4_0> {
2947
static constexpr uint32_t vdr_mmvq = 2;
3048
};
3149

32-
// qs and d are expected to be contiguous in memory
33-
// out-of-bounds qs will access d values
34-
// Aligment relies on the allocated size of qs, so other block types
35-
// may require padding
36-
3750
static constexpr int get_block_offset(const int block_index) { return block_index * (traits::qk / traits::qr); }
3851

3952
static constexpr int get_d_offset(int nrows, int ncols, const int block_index) {

0 commit comments

Comments
 (0)