Skip to content

Commit 4248764

Browse files
committed
cmake build update
and correct warning
1 parent a8e7e6f commit 4248764

File tree

7 files changed

+64
-26
lines changed

7 files changed

+64
-26
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ if (NOT DEFINED GGML_LLAMAFILE)
8888
set(GGML_LLAMAFILE_DEFAULT ON)
8989
endif()
9090

91+
if (NOT DEFINED GGML_OPENMP_SIMD)
92+
set(GGML_OPENMP_SIMD_DEFAULT ON)
93+
endif()
94+
9195
if (NOT DEFINED GGML_AMX)
9296
set(GGML_AMX ON)
9397
endif()

ggml/CMakeLists.txt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ if (NOT GGML_LLAMAFILE_DEFAULT)
6161
set(GGML_LLAMAFILE_DEFAULT OFF)
6262
endif()
6363

64+
if (NOT GGML_OPENMP_SIMD_DEFAULT)
65+
set(GGML_OPENMP_SIMD_DEFAULT OFF)
66+
endif()
67+
6468
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
6569
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
6670
endif()
@@ -109,6 +113,7 @@ endif()
109113
option(GGML_LASX "ggml: enable lasx" ON)
110114
option(GGML_LSX "ggml: enable lsx" ON)
111115
option(GGML_SVE "ggml: enable SVE" OFF)
116+
option(GGML_OPENMP_SIMD "ggml: enable OPENMP_SIMD" ${GGML_OPENMP_SIMD_DEFAULT})
112117

113118
if (WIN32)
114119
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
@@ -178,11 +183,11 @@ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
178183
set(CMAKE_C_STANDARD 11)
179184
set(CMAKE_C_STANDARD_REQUIRED true)
180185

181-
if (GGML_SYCL)
186+
#if (GGML_SYCL)
182187
set(CMAKE_CXX_STANDARD 17)
183-
else()
184-
set(CMAKE_CXX_STANDARD 11)
185-
endif()
188+
#else()
189+
# set(CMAKE_CXX_STANDARD 11)
190+
#endif()
186191
set(CMAKE_CXX_STANDARD_REQUIRED true)
187192

188193
set(THREADS_PREFER_PTHREAD_FLAG ON)

ggml/include/ggml.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ extern "C" {
394394
GGML_TYPE_E4M3 = 37,
395395
GGML_TYPE_E4M3_Q = 38,
396396
GGML_TYPE_E3M4_Q = 39,
397-
// E5M6 => 12 bits vs 16 bits for BF16 = E8M7 / FP16 = E5M10
397+
// E5M6 => 12 bits vs 16 bits for BF16 = E8M7 / FP16 = E5M10
398398
GGML_TYPE_COUNT,
399399
};
400400

ggml/src/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,16 @@ if (GGML_OPENMP)
171171
endif()
172172
endif()
173173

174+
if (GGML_OPENMP_SIMD)
175+
check_cxx_compiler_flag("-fopenmp_simd" SUPPORTS_OPENMP_SIMD)
176+
if(SUPPORTS_OPENMP_SIMD)
177+
message(STATUS "Using openmp_simd.")
178+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp_simd")
179+
else()
180+
message(FATAL_ERROR, "C++ compiler lacks OPENMP_SIMD support.")
181+
endif()
182+
endif()
183+
174184
if (GGML_BLAS)
175185
if (GGML_STATIC)
176186
set(BLA_STATIC ON)
@@ -1360,6 +1370,10 @@ endif()
13601370
# libraries
13611371
#
13621372

1373+
# FP8
1374+
file(GLOB GGML_HEADERS_FP8 "ggml-fp8.h")
1375+
file(GLOB GGML_SOURCES_FP8 "ggml-fp8.cpp")
1376+
13631377
# ggml
13641378

13651379
add_library(ggml
@@ -1384,6 +1398,7 @@ add_library(ggml
13841398
${GGML_SOURCES_AMX} ${GGML_HEADERS_AMX}
13851399
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
13861400
ggml-aarch64.c ggml-aarch64.h
1401+
${GGML_SOURCES_FP8} ${GGML_HEADERS_FP8}
13871402
)
13881403

13891404
if (EMSCRIPTEN)

ggml/src/ggml-common.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ typedef uint32_t ggml_half2;
88

99
#define GGML_COMMON_AGGR
1010

11+
#define GGML_COMMON_DECL
12+
#elif defined(GGML_COMMON_DECL_CPP)
13+
#include <stdint.h>
14+
15+
typedef uint16_t ggml_half;
16+
typedef uint32_t ggml_half2;
17+
18+
#define GGML_COMMON_AGGR data
19+
1120
#define GGML_COMMON_DECL
1221
#elif defined(GGML_COMMON_DECL_METAL)
1322
#include <metal_stdlib>

ggml/src/ggml-fp8.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
11
#define GGML_COMMON_IMPL_C
2+
#define GGML_COMMON_DECL_CPP
23
#include "ggml-common.h"
3-
44
#include "ggml-fp8.h"
55

66
#include <cassert>
77

88
/*
9+
make clean
10+
make -j8
911
# ./llama-quantize --output-tensor-type fp8_e3m4_q ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
1012
./llama-quantize ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
1113
./llama-cli -c 1024 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf -p "[INST]bonjour a tu un nom. je ne sais pas comment t'appeler. Si tu n'en as pas je peux t'appeler TINTIN[/INST]" -s 42
12-
# ./llama-perplexity -f ~/LLM/wikitext-2-raw/wiki.test.raw -s 31337 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf
1314
./llama-perplexity --kl-divergence-base ~/LLM/Mistral-Nemo-Instruct-2407.BF16.kld --kl-divergence -s 31337 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf
1415
16+
rm -rf build
17+
cmake -B build
18+
cmake --build build --config Release
19+
./build/llama-quantize ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
20+
./build/llama-cli -c 1024 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf -p "[INST]bonjour a tu un nom. je ne sais pas comment t'appeler. Si tu n'en as pas je peux t'appeler TINTIN[/INST]" -s 42
21+
./build/llama-perplexity --kl-divergence-base ~/LLM/Mistral-Nemo-Instruct-2407.BF16.kld --kl-divergence -s 31337 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf
22+
1523
*/
1624

1725
#include <iostream>
1826
#include <cstdint>
19-
#include <immintrin.h>
2027

2128
template<int N> constexpr float EXP2() {
2229
if constexpr (N==0) return 1;

ggml/src/ggml-fp8.h

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22

33
#define GGML_COMMON_DECL_C
44
#include "ggml-common.h"
5-
65
#include "ggml.h"
76

8-
// les definitions / converstion FP8 <=> FP32
97
#ifdef __cplusplus
108
extern "C" {
119
#endif
@@ -14,28 +12,28 @@ extern "C" {
1412
typedef struct { uint8_t bits; } ggml_e4m3_t;
1513
typedef struct { uint8_t bits; } ggml_e3m4_t;
1614

17-
void ggml_e5m2_to_fp32_row(const ggml_e5m2_t * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
18-
void ggml_fp32_to_e5m2_row(const float * GGML_RESTRICT x, ggml_e5m2_t * GGML_RESTRICT y, int64_t k);
19-
void ggml_fp32_to_e5m2_row_ref(const float * GGML_RESTRICT x, ggml_e5m2_t * GGML_RESTRICT y, int64_t k);
15+
GGML_API void ggml_e5m2_to_fp32_row(const ggml_e5m2_t * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
16+
GGML_API void ggml_fp32_to_e5m2_row(const float * GGML_RESTRICT x, ggml_e5m2_t * GGML_RESTRICT y, int64_t k);
17+
GGML_API void ggml_fp32_to_e5m2_row_ref(const float * GGML_RESTRICT x, ggml_e5m2_t * GGML_RESTRICT y, int64_t k);
2018

21-
void ggml_e4m3_to_fp32_row(const ggml_e4m3_t * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
22-
void ggml_fp32_to_e4m3_row(const float * GGML_RESTRICT x, ggml_e4m3_t * GGML_RESTRICT y, int64_t k);
23-
void ggml_fp32_to_e4m3_row_ref(const float * GGML_RESTRICT x, ggml_e4m3_t * GGML_RESTRICT y, int64_t k);
19+
GGML_API void ggml_e4m3_to_fp32_row(const ggml_e4m3_t * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
20+
GGML_API void ggml_fp32_to_e4m3_row(const float * GGML_RESTRICT x, ggml_e4m3_t * GGML_RESTRICT y, int64_t k);
21+
GGML_API void ggml_fp32_to_e4m3_row_ref(const float * GGML_RESTRICT x, ggml_e4m3_t * GGML_RESTRICT y, int64_t k);
2422

25-
void dequantize_row_e4m3_q(const block_e4m3_q * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
26-
void quantize_row_e4m3_q(const float * GGML_RESTRICT x, block_e4m3_q * GGML_RESTRICT y, int64_t k);
27-
void quantize_row_e4m3_q_ref(const float * GGML_RESTRICT x, block_e4m3_q * GGML_RESTRICT y, int64_t k);
23+
GGML_API void dequantize_row_e4m3_q(const block_e4m3_q * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
24+
GGML_API void quantize_row_e4m3_q(const float * GGML_RESTRICT x, block_e4m3_q * GGML_RESTRICT y, int64_t k);
25+
GGML_API void quantize_row_e4m3_q_ref(const float * GGML_RESTRICT x, block_e4m3_q * GGML_RESTRICT y, int64_t k);
2826

29-
void dequantize_row_e3m4_q(const block_e3m4_q * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
30-
void quantize_row_e3m4_q(const float * GGML_RESTRICT x, block_e3m4_q * GGML_RESTRICT y, int64_t k);
31-
void quantize_row_e3m4_q_ref(const float * GGML_RESTRICT x, block_e3m4_q * GGML_RESTRICT y, int64_t k);
27+
GGML_API void dequantize_row_e3m4_q(const block_e3m4_q * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
28+
GGML_API void quantize_row_e3m4_q(const float * GGML_RESTRICT x, block_e3m4_q * GGML_RESTRICT y, int64_t k);
29+
GGML_API void quantize_row_e3m4_q_ref(const float * GGML_RESTRICT x, block_e3m4_q * GGML_RESTRICT y, int64_t k);
3230

3331
// TODO: the best depend on the CPU fp32 / bf16 / fp16
3432
#define GGML_FP8_VECT_DOT_TYPE GGML_TYPE_F32
35-
void ggml_vec_dot_e5m2(int n, float * GGML_RESTRICT s, size_t bs, const ggml_e5m2_t * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
36-
void ggml_vec_dot_e4m3(int n, float * GGML_RESTRICT s, size_t bs, const ggml_e4m3_t * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
37-
void ggml_vec_dot_e4m3_q(int n, float * GGML_RESTRICT s, size_t bs, const block_e4m3_q * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
38-
void ggml_vec_dot_e3m4_q(int n, float * GGML_RESTRICT s, size_t bs, const block_e3m4_q * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
33+
GGML_API void ggml_vec_dot_e5m2(int n, float * GGML_RESTRICT s, size_t bs, const ggml_e5m2_t * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
34+
GGML_API void ggml_vec_dot_e4m3(int n, float * GGML_RESTRICT s, size_t bs, const ggml_e4m3_t * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
35+
GGML_API void ggml_vec_dot_e4m3_q(int n, float * GGML_RESTRICT s, size_t bs, const block_e4m3_q * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
36+
GGML_API void ggml_vec_dot_e3m4_q(int n, float * GGML_RESTRICT s, size_t bs, const block_e3m4_q * GGML_RESTRICT vx, size_t bx, const float * GGML_RESTRICT vy, size_t by, int nrc);
3937

4038
#ifdef __cplusplus
4139
}

0 commit comments

Comments
 (0)