22
33#define GGML_COMMON_DECL_C
44#include "ggml-common.h"
5-
65#include "ggml.h"
76
8- // les definitions / converstion FP8 <=> FP32
97#ifdef __cplusplus
108extern "C" {
119#endif
@@ -14,28 +12,28 @@ extern "C" {
1412 typedef struct { uint8_t bits ; } ggml_e4m3_t ;
1513 typedef struct { uint8_t bits ; } ggml_e3m4_t ;
1614
17- void ggml_e5m2_to_fp32_row (const ggml_e5m2_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
18- void ggml_fp32_to_e5m2_row (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
19- void ggml_fp32_to_e5m2_row_ref (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
15+ GGML_API void ggml_e5m2_to_fp32_row (const ggml_e5m2_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
16+ GGML_API void ggml_fp32_to_e5m2_row (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
17+ GGML_API void ggml_fp32_to_e5m2_row_ref (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
2018
21- void ggml_e4m3_to_fp32_row (const ggml_e4m3_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
22- void ggml_fp32_to_e4m3_row (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
23- void ggml_fp32_to_e4m3_row_ref (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
19+ GGML_API void ggml_e4m3_to_fp32_row (const ggml_e4m3_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
20+ GGML_API void ggml_fp32_to_e4m3_row (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
21+ GGML_API void ggml_fp32_to_e4m3_row_ref (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
2422
25- void dequantize_row_e4m3_q (const block_e4m3_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
26- void quantize_row_e4m3_q (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
27- void quantize_row_e4m3_q_ref (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
23+ GGML_API void dequantize_row_e4m3_q (const block_e4m3_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
24+ GGML_API void quantize_row_e4m3_q (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
25+ GGML_API void quantize_row_e4m3_q_ref (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
2826
29- void dequantize_row_e3m4_q (const block_e3m4_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
30- void quantize_row_e3m4_q (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
31- void quantize_row_e3m4_q_ref (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
27+ GGML_API void dequantize_row_e3m4_q (const block_e3m4_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
28+ GGML_API void quantize_row_e3m4_q (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
29+ GGML_API void quantize_row_e3m4_q_ref (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
3230
3331 // TODO: the best depend on the CPU fp32 / bf16 / fp16
3432#define GGML_FP8_VECT_DOT_TYPE GGML_TYPE_F32
35- void ggml_vec_dot_e5m2 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e5m2_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
36- void ggml_vec_dot_e4m3 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e4m3_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
37- void ggml_vec_dot_e4m3_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e4m3_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
38- void ggml_vec_dot_e3m4_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e3m4_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
33+ GGML_API void ggml_vec_dot_e5m2 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e5m2_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
34+ GGML_API void ggml_vec_dot_e4m3 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e4m3_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
35+ GGML_API void ggml_vec_dot_e4m3_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e4m3_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
36+ GGML_API void ggml_vec_dot_e3m4_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e3m4_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
3937
4038#ifdef __cplusplus
4139}
0 commit comments