Skip to content

Commit 429d69f

Browse files
committed
Latest commits, logits bias strings from input text file
* you can now add a list of words between `[[` and `]]` at the end of the text in `_regens.txt` input files, still WIP
1 parent ee7b4d3 commit 429d69f

22 files changed

+3798
-3694
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,9 @@ OBJS_GGUF_LLAMA = \
480480
$(TMP)$(PREFIX)_llama-impl.o \
481481
$(TMP)$(PREFIX)_llama-io.o \
482482
$(TMP)$(PREFIX)_llama-kv-cache.o \
483+
$(TMP)$(PREFIX)_llama-kv-cache-unified.o \
484+
$(TMP)$(PREFIX)_llama-kv-cache-unified-iswa.o \
485+
$(TMP)$(PREFIX)_llama-kv-cache-recurrent.o \
483486
$(TMP)$(PREFIX)_llama-memory.o \
484487
$(TMP)$(PREFIX)_llama-mmap.o \
485488
$(TMP)$(PREFIX)_llama-model-loader.o \

base_sampling2/class_chat.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ int main(int argc, char ** argv) {
118118

119119
std::string suffix_addon = "";
120120

121-
if (filename.rfind(".json") != filename.npos){
121+
if (filename.rfind(".json") != filename.npos) {
122122
SetConsoleTitle("Loading a json file...");
123123
auto instantJson = getJson(filename);
124124
if (instantJson.contains("presets")){
@@ -152,6 +152,7 @@ int main(int argc, char ** argv) {
152152
//threadedChat.externalData = "Cycles left: " + std::to_string(regens);
153153
}
154154
std::string extract = extract_string_mod(inputPrompt, "{{","}}");
155+
extract_logit_bias_strings(inputPrompt, "[[","]]" , settings.modelConfig[settings.modelName]);
155156
if (extract != "NULL") suffix_addon = extract;
156157
}
157158

base_sampling2/master/ggml/include/ggml.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,9 +2095,6 @@ extern "C" {
20952095
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
20962096
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
20972097

2098-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2099-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2100-
21012098
// print info and performance information for the graph
21022099
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
21032100

base_sampling2/master/ggml/src/ggml-blas/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
8181
target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
8282
target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
8383
else()
84-
message(ERROR "BLAS not found, please refer to "
85-
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86-
" to set correct GGML_BLAS_VENDOR")
84+
message(FATAL_ERROR "BLAS not found, please refer to "
85+
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86+
" to set correct GGML_BLAS_VENDOR")
8787
endif()

base_sampling2/master/ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,15 +2425,14 @@ static bool ggml_thread_apply_priority(int32_t prio) {
24252425
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
24262426
}
24272427

2428-
#if defined(__GNUC__)
2429-
// MinGW doesn't support THREAD_POWER_THROTTLING_CURRENT_VERSION and THREAD_POWER_THROTTLING_EXECUTION_SPEED
2430-
#else
24312428
if (prio != GGML_SCHED_PRIO_LOW) {
24322429
// Tell Windows that this thread should not be throttled (needs its own CPU core).
24332430
// Newer Windows 11 versions aggresively park (offline) CPU cores and often place
24342431
// all our threads onto the first 4 cores which results in terrible performance with
24352432
// n_threads > 4
2436-
#if _WIN32_WINNT >= 0x0602
2433+
// MinGW doesn't support THREAD_POWER_THROTTLING_CURRENT_VERSION
2434+
// and THREAD_POWER_THROTTLING_EXECUTION_SPEED
2435+
#if !defined(__GNUC__) && _WIN32_WINNT >= 0x0602
24372436
THREAD_POWER_THROTTLING_STATE t;
24382437
ZeroMemory(&t, sizeof(t));
24392438
t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
@@ -2446,7 +2445,6 @@ static bool ggml_thread_apply_priority(int32_t prio) {
24462445
}
24472446
#endif
24482447
}
2449-
#endif
24502448

24512449
if (prio == GGML_SCHED_PRIO_NORMAL) {
24522450
// Keep inherited policy/priority

base_sampling2/master/ggml/src/ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
extern "C" {
3333
#endif
3434

35+
void ggml_print_backtrace(void);
36+
3537
#ifndef MIN
3638
# define MIN(a, b) ((a) < (b) ? (a) : (b))
3739
#endif

base_sampling2/master/ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1652,7 +1652,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_
16521652
return {64, 32};
16531653
}
16541654
return {64, 64};
1655-
};
1655+
}
16561656

16571657
static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {
16581658

base_sampling2/master/ggml/src/ggml.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ static void ggml_print_backtrace_symbols(void) {
133133
}
134134
#endif
135135

136-
static void ggml_print_backtrace(void) {
136+
void ggml_print_backtrace(void) {
137137
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
138138
if (GGML_NO_BACKTRACE) {
139139
return;
@@ -160,13 +160,18 @@ static void ggml_print_backtrace(void) {
160160
const int parent_pid = getpid();
161161
const int child_pid = fork();
162162
if (child_pid < 0) { // error
163+
#if defined(__linux__)
164+
close(lock[1]);
165+
close(lock[0]);
166+
#endif
163167
return;
164168
} else if (child_pid == 0) { // child
165169
char attach[32];
166170
snprintf(attach, sizeof(attach), "attach %d", parent_pid);
167171
#if defined(__linux__)
168172
close(lock[1]);
169173
(void) !read(lock[0], lock, 1);
174+
close(lock[0]);
170175
#endif
171176
// try gdb
172177
execlp("gdb", "gdb", "--batch",
@@ -195,7 +200,7 @@ static void ggml_print_backtrace(void) {
195200
}
196201
}
197202
#else
198-
static void ggml_print_backtrace(void) {
203+
void ggml_print_backtrace(void) {
199204
// platform not supported
200205
}
201206
#endif
@@ -216,6 +221,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
216221
abort();
217222
}
218223

224+
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
225+
219226
//
220227
// logging
221228
//
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include "ggml-impl.h"
2+
3+
#include <cstdlib>
4+
#include <exception>
5+
6+
static std::terminate_handler previous_terminate_handler;
7+
8+
GGML_NORETURN static void ggml_uncaught_exception() {
9+
ggml_print_backtrace();
10+
if (previous_terminate_handler) {
11+
previous_terminate_handler();
12+
}
13+
abort(); // unreachable unless previous_terminate_handler was nullptr
14+
}
15+
16+
static bool ggml_uncaught_exception_init = []{
17+
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
18+
if (GGML_NO_BACKTRACE) {
19+
return false;
20+
}
21+
const auto prev{std::get_terminate()};
22+
GGML_ASSERT(prev != ggml_uncaught_exception);
23+
previous_terminate_handler = prev;
24+
std::set_terminate(ggml_uncaught_exception);
25+
return true;
26+
}();

base_sampling2/master/ggml/src/gguf.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,11 +347,28 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
347347
int64_t n_tensors = 0;
348348

349349
if (ok && gr.read(ctx->version)) {
350-
if (ctx->version == 1) {
350+
if (ok && ctx->version == 0) {
351+
GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
352+
ok = false;
353+
}
354+
355+
/*
356+
* bit layout is different when reading non-native endian models.
357+
* assuming that the GGUF version is 3, the non-native endian model
358+
* would read it as 0x30000000. we can use the AND operation against
359+
* the last 4 hexadecimal digits to check if the model is the same
360+
* endianness as the host system.
361+
*/
362+
if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
363+
GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
364+
ok = false;
365+
}
366+
367+
if (ok && ctx->version == 1) {
351368
GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352369
ok = false;
353370
}
354-
if (ctx->version > GGUF_VERSION) {
371+
if (ok && ctx->version > GGUF_VERSION) {
355372
GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356373
__func__, ctx->version, GGUF_VERSION);
357374
ok = false;

0 commit comments

Comments
 (0)