Skip to content

Commit 00facc5

Browse files
ggerganovJohannesGaessler
authored andcommitted
cmake : add sanitizer flags for llama.cpp (ggml-org#11279)
* cmake : add sanitizer flags for llama.cpp ggml-ci * tests : fix compile warnings ggml-ci * cmake : move sanitizer flags to llama_add_compile_flags ggml-ci * cmake : move llama.cpp compile flags to top level lists ggml-ci * cmake : apply only sanitizer flags at top level ggml-ci * tests : fix gguf context use in same_tensor_data * gguf-test: tensor data comparison * dummy : trigger ggml-ci * unicode : silence gcc warnings ggml-ci * ci : use sanitizer builds only in Debug mode ggml-ci * cmake : add status messages [no ci] --------- Co-authored-by: Johannes Gäßler <[email protected]>
1 parent f8352e9 commit 00facc5

File tree

7 files changed

+74
-41
lines changed

7 files changed

+74
-41
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ jobs:
234234
strategy:
235235
matrix:
236236
sanitizer: [ADDRESS, THREAD, UNDEFINED]
237-
build_type: [Debug, Release]
237+
build_type: [Debug]
238238

239239
steps:
240240
- name: Clone

CMakeLists.txt

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,8 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
8383
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
8484

8585
# override ggml options
86-
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
87-
set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
88-
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
89-
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
90-
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
86+
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
87+
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
9188

9289
# change the default for these ggml options
9390
if (NOT DEFINED GGML_LLAMAFILE)
@@ -117,16 +114,62 @@ llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
117114
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
118115
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
119116

117+
if (NOT MSVC)
118+
if (LLAMA_SANITIZE_THREAD)
119+
message(STATUS "Using -fsanitize=thread")
120+
121+
add_compile_options(-fsanitize=thread)
122+
link_libraries (-fsanitize=thread)
123+
endif()
124+
125+
if (LLAMA_SANITIZE_ADDRESS)
126+
message(STATUS "Using -fsanitize=address")
127+
128+
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
129+
link_libraries (-fsanitize=address)
130+
endif()
131+
132+
if (LLAMA_SANITIZE_UNDEFINED)
133+
message(STATUS "Using -fsanitize=undefined")
134+
135+
add_compile_options(-fsanitize=undefined)
136+
link_libraries (-fsanitize=undefined)
137+
endif()
138+
endif()
139+
120140
#
121-
# build the library
141+
# 3rd-party
122142
#
123143

124144
if (NOT TARGET ggml)
125145
add_subdirectory(ggml)
126146
# ... otherwise assume ggml is added by a parent CMakeLists.txt
127147
endif()
148+
149+
#
150+
# build the library
151+
#
152+
128153
add_subdirectory(src)
129154

155+
#
156+
# utils, programs, examples and tests
157+
#
158+
159+
if (LLAMA_BUILD_COMMON)
160+
add_subdirectory(common)
161+
endif()
162+
163+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
164+
include(CTest)
165+
add_subdirectory(tests)
166+
endif()
167+
168+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
169+
add_subdirectory(examples)
170+
add_subdirectory(pocs)
171+
endif()
172+
130173
#
131174
# install
132175
#
@@ -200,21 +243,3 @@ configure_file(cmake/llama.pc.in
200243

201244
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
202245
DESTINATION lib/pkgconfig)
203-
204-
#
205-
# utils, programs, examples and tests
206-
#
207-
208-
if (LLAMA_BUILD_COMMON)
209-
add_subdirectory(common)
210-
endif()
211-
212-
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
213-
include(CTest)
214-
add_subdirectory(tests)
215-
endif()
216-
217-
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
218-
add_subdirectory(examples)
219-
add_subdirectory(pocs)
220-
endif()

ggml/src/gguf.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,10 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
648648

649649
ok = ok && data != nullptr;
650650

651+
if (ok) {
652+
ggml_set_name(data, "GGUF tensor data binary blob");
653+
}
654+
651655
// read the binary blob with the tensor data
652656
ok = ok && gr.read(data->data, ctx->size);
653657

src/unicode.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,17 @@
77

88
#include <algorithm>
99
#include <cassert>
10+
#include <codecvt>
1011
#include <cstddef>
1112
#include <cstdint>
13+
#include <locale>
1214
#include <map>
1315
#include <regex>
1416
#include <stdexcept>
1517
#include <string>
1618
#include <unordered_map>
17-
#include <unordered_set>
1819
#include <utility>
1920
#include <vector>
20-
#include <locale>
21-
#include <codecvt>
2221

2322
size_t unicode_len_utf8(char src) {
2423
const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };

tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
llama_add_compile_flags()
2+
13
function(llama_test target)
24
include(CMakeParseArguments)
35
set(options)

tests/test-gguf.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ enum handcrafted_file_type {
4848
HANDCRAFTED_DATA_CUSTOM_ALIGN = 810 + offset_has_data,
4949
};
5050

51-
std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) {
51+
static std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) {
5252
switch (hft) {
5353
case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC";
5454
case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1";
@@ -99,7 +99,7 @@ static bool expect_context_not_null(const enum handcrafted_file_type hft) {
9999

100100
typedef std::pair<enum ggml_type, std::array<int64_t, GGML_MAX_DIMS>> tensor_config_t;
101101

102-
std::vector<tensor_config_t> get_tensor_configs(std::mt19937 & rng) {
102+
static std::vector<tensor_config_t> get_tensor_configs(std::mt19937 & rng) {
103103
std::vector<tensor_config_t> tensor_configs;
104104
tensor_configs.reserve(100);
105105

@@ -122,7 +122,7 @@ std::vector<tensor_config_t> get_tensor_configs(std::mt19937 & rng) {
122122
return tensor_configs;
123123
}
124124

125-
std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::mt19937 rng) {
125+
static std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::mt19937 rng) {
126126
std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types;
127127
kv_types.reserve(100);
128128

@@ -626,8 +626,6 @@ static bool handcrafted_check_tensor_data(const gguf_context * gguf_ctx, const u
626626

627627
bool ok = true;
628628

629-
const uint32_t alignment = GGUF_DEFAULT_ALIGNMENT;
630-
631629
for (int i = 0; i < int(tensor_configs.size()); ++i) {
632630
const ggml_type type = tensor_configs[i].first;
633631
const std::array<int64_t, GGML_MAX_DIMS> shape = tensor_configs[i].second;
@@ -866,13 +864,13 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t
866864
case GGUF_TYPE_COUNT:
867865
default: {
868866
GGML_ABORT("fatal error");
869-
} break;
867+
}
870868
}
871869
} break;
872870
case GGUF_TYPE_COUNT:
873871
default: {
874872
GGML_ABORT("fatal error");
875-
} break;
873+
}
876874
}
877875
}
878876

@@ -938,7 +936,7 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
938936
}
939937

940938
if (type == GGUF_TYPE_ARRAY) {
941-
const int arr_n = gguf_get_arr_n(ctx, id);
939+
const size_t arr_n = gguf_get_arr_n(ctx, id);
942940
if (arr_n != gguf_get_arr_n(other, idx_other)) {
943941
ok = false;
944942
continue;
@@ -953,7 +951,7 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
953951
if (type_arr == GGUF_TYPE_BOOL) {
954952
const int8_t * data = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx, id));
955953
const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(other, idx_other));
956-
for (int arr_i = 0; arr_i < arr_n; ++arr_i) {
954+
for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) {
957955
if (bool(data[arr_i]) != bool(data_other[arr_i])) {
958956
ok = false;
959957
}
@@ -962,7 +960,7 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
962960
}
963961

964962
if (type_arr == GGUF_TYPE_STRING) {
965-
for (int arr_i = 0; arr_i < arr_n; ++arr_i) {
963+
for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) {
966964
const std::string str = gguf_get_arr_str(ctx, id, arr_i);
967965
const std::string str_other = gguf_get_arr_str(other, idx_other, arr_i);
968966
if (str != str_other) {
@@ -1033,6 +1031,12 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml
10331031

10341032
struct ggml_tensor * t_orig = ggml_get_first_tensor(orig);
10351033
struct ggml_tensor * t_read = ggml_get_first_tensor(read);
1034+
1035+
if (std::string(t_read->name) != "GGUF tensor data binary blob") {
1036+
return false;
1037+
}
1038+
t_read = ggml_get_next_tensor(read, t_read);
1039+
10361040
while (t_orig) {
10371041
if (!t_read) {
10381042
ok = false;
@@ -1051,13 +1055,13 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml
10511055
}
10521056

10531057
t_orig = ggml_get_next_tensor(orig, t_orig);
1054-
t_read = ggml_get_next_tensor(orig, t_read);
1058+
t_read = ggml_get_next_tensor(read, t_read);
10551059
}
10561060
if (t_read) {
10571061
ok = false;
10581062
}
10591063

1060-
return true;
1064+
return ok;
10611065
}
10621066

10631067
static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) {

tests/test-sampling.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,6 @@ static void test_penalties(
144144

145145
sampler_tester tester(probs, probs_expected);
146146

147-
const size_t n_vocab = probs.size();
148147
auto * sampler = llama_sampler_init_penalties(last_tokens.size(), repeat_penalty, alpha_frequency, alpha_presence);
149148

150149
for (size_t i = 0; i < last_tokens.size(); i++) {

0 commit comments

Comments
 (0)