66typedef uint16_t ggml_half;
77typedef uint32_t ggml_half2;
88
9- #define GGML_COMMON_AGGR
9+ #define GGML_COMMON_AGGR_U
10+ #define GGML_COMMON_AGGR_S
11+
12+ #define GGML_COMMON_DECL
13+ #elif defined(GGML_COMMON_DECL_CPP)
14+ #include < cstdint>
15+
16+ typedef uint16_t ggml_half;
17+ typedef uint32_t ggml_half2;
18+
19+ // std-c++ allow anonymous unions but some compiler warn on it
20+ #define GGML_COMMON_AGGR_U data
21+ // std-c++ do not allow it.
22+ #define GGML_COMMON_AGGR_S data
1023
1124#define GGML_COMMON_DECL
1225#elif defined(GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t ggml_half2;
1528typedef half ggml_half;
1629typedef half2 ggml_half2;
1730
18- #define GGML_COMMON_AGGR
31+ #define GGML_COMMON_AGGR_U
32+ #define GGML_COMMON_AGGR_S
1933
2034#define GGML_COMMON_DECL
2135#elif defined(GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 ggml_half2;
2943typedef half ggml_half;
3044typedef half2 ggml_half2;
3145
32- #define GGML_COMMON_AGGR data
46+ #define GGML_COMMON_AGGR_U
47+ #define GGML_COMMON_AGGR_S data
3348
3449#define GGML_COMMON_DECL
3550#elif defined(GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 ggml_half2;
3954typedef half ggml_half;
4055typedef half2 ggml_half2;
4156
42- #define GGML_COMMON_AGGR data
57+ #define GGML_COMMON_AGGR_U
58+ #define GGML_COMMON_AGGR_S data
4359
4460#define GGML_COMMON_DECL
4561#elif defined(GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 ggml_half2;
4965typedef sycl::half ggml_half;
5066typedef sycl::half2 ggml_half2;
5167
52- #define GGML_COMMON_AGGR data
68+ #define GGML_COMMON_AGGR_U
69+ #define GGML_COMMON_AGGR_S data
5370
5471#define GGML_COMMON_DECL
5572#endif
@@ -154,9 +171,9 @@ typedef struct {
154171 struct {
155172 ggml_half d; // delta
156173 ggml_half m; // min
157- } GGML_COMMON_AGGR ;
174+ } GGML_COMMON_AGGR_S ;
158175 ggml_half2 dm;
159- };
176+ } GGML_COMMON_AGGR_U ;
160177 uint8_t qs[QK4_1 / 2 ]; // nibbles / quants
161178} block_q4_1;
162179static_assert (sizeof (block_q4_1) == 2 * sizeof (ggml_half) + QK4_1 / 2 , " wrong q4_1 block size/padding" );
@@ -175,9 +192,9 @@ typedef struct {
175192 struct {
176193 ggml_half d; // delta
177194 ggml_half m; // min
178- } GGML_COMMON_AGGR ;
195+ } GGML_COMMON_AGGR_S ;
179196 ggml_half2 dm;
180- };
197+ } GGML_COMMON_AGGR_U ;
181198 uint8_t qh[4 ]; // 5-th bit of quants
182199 uint8_t qs[QK5_1 / 2 ]; // nibbles / quants
183200} block_q5_1;
@@ -196,9 +213,9 @@ typedef struct {
196213 struct {
197214 ggml_half d; // delta
198215 ggml_half s; // d * sum(qs[i])
199- } GGML_COMMON_AGGR ;
216+ } GGML_COMMON_AGGR_S ;
200217 ggml_half2 ds;
201- };
218+ } GGML_COMMON_AGGR_U ;
202219 int8_t qs[QK8_1]; // quants
203220} block_q8_1;
204221static_assert (sizeof (block_q8_1) == 2 *sizeof (ggml_half) + QK8_1, " wrong q8_1 block size/padding" );
@@ -261,9 +278,9 @@ typedef struct {
261278 struct {
262279 ggml_half d; // super-block scale for quantized scales
263280 ggml_half dmin; // super-block scale for quantized mins
264- } GGML_COMMON_AGGR ;
281+ } GGML_COMMON_AGGR_S ;
265282 ggml_half2 dm;
266- };
283+ } GGML_COMMON_AGGR_U ;
267284} block_q2_K;
268285static_assert (sizeof (block_q2_K) == 2 *sizeof (ggml_half) + QK_K/16 + QK_K/4 , " wrong q2_K block size/padding" );
269286
@@ -288,9 +305,9 @@ typedef struct {
288305 struct {
289306 ggml_half d; // super-block scale for quantized scales
290307 ggml_half dmin; // super-block scale for quantized mins
291- } GGML_COMMON_AGGR ;
308+ } GGML_COMMON_AGGR_S ;
292309 ggml_half2 dm;
293- };
310+ } GGML_COMMON_AGGR_U ;
294311 uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295312 uint8_t qs[QK_K/2 ]; // 4--bit quants
296313} block_q4_K;
@@ -305,9 +322,9 @@ typedef struct {
305322 struct {
306323 ggml_half d; // super-block scale for quantized scales
307324 ggml_half dmin; // super-block scale for quantized mins
308- } GGML_COMMON_AGGR ;
325+ } GGML_COMMON_AGGR_S ;
309326 ggml_half2 dm;
310- };
327+ } GGML_COMMON_AGGR_U ;
311328 uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312329 uint8_t qh[QK_K/8 ]; // quants, high bit
313330 uint8_t qs[QK_K/2 ]; // quants, low 4 bits
@@ -424,6 +441,24 @@ typedef struct {
424441} block_iq4_nlx4;
425442static_assert (sizeof (block_iq4_nlx4) == 4 * sizeof (ggml_half) + QK4_NL * 2 , " wrong iq4_nlx4 block size/padding" );
426443
444+ // fp8 support
445+ // - fp8 simple type
446+ typedef struct { uint8_t bits; } ggml_e5m2_t ;
447+ typedef struct { uint8_t bits; } ggml_e4m3_t ;
448+
449+ // - fp8 with bloc delta => 8.125 bpw
450+ typedef struct {
451+ float d; // delta
452+ uint8_t qs[QK_K];
453+ } block_e4m3_q;
454+ static_assert (sizeof (block_e4m3_q) == sizeof (float ) + QK_K, " wrong block_e4m3_q block size/padding" );
455+
456+ typedef struct {
457+ float d; // delta
458+ uint8_t qs[QK_K];
459+ } block_e3m4_q;
460+ static_assert (sizeof (block_e3m4_q) == sizeof (float ) + QK_K, " wrong block_e3m4_q block size/padding" );
461+
427462#endif // GGML_COMMON_DECL
428463#endif // GGML_COMMON_DECL
429464
@@ -437,6 +472,13 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wro
437472#define GGML_TABLE_BEGIN (type, name, size ) static const type name[size] = {
438473#define GGML_TABLE_END () };
439474
475+ #define GGML_COMMON_IMPL
476+ #elif defined(GGML_COMMON_IMPL_CPP)
477+ #include < cstdint>
478+
479+ #define GGML_TABLE_BEGIN (type, name, size ) static const type name[size] = {
480+ #define GGML_TABLE_END () };
481+
440482#define GGML_COMMON_IMPL
441483#elif defined(GGML_COMMON_IMPL_METAL)
442484#include < metal_stdlib>
0 commit comments