MaggotHATE
diff --git a/‎Makefile‎
Lines changed: 3 additions & 0 deletions b/‎Makefile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎base_sampling2/class_chat.cpp‎
Lines changed: 2 additions & 1 deletion b/‎base_sampling2/class_chat.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎base_sampling2/master/ggml/include/ggml.h‎
Lines changed: 0 additions & 3 deletions b/‎base_sampling2/master/ggml/include/ggml.h‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎base_sampling2/master/ggml/src/ggml-blas/CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions b/‎base_sampling2/master/ggml/src/ggml-blas/CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎base_sampling2/master/ggml/src/ggml-cpu/ggml-cpu.c‎
Lines changed: 3 additions & 5 deletions b/‎base_sampling2/master/ggml/src/ggml-cpu/ggml-cpu.c‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎base_sampling2/master/ggml/src/ggml-impl.h‎
Lines changed: 2 additions & 0 deletions b/‎base_sampling2/master/ggml/src/ggml-impl.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎base_sampling2/master/ggml/src/ggml-vulkan/ggml-vulkan.cpp‎
Lines changed: 1 addition & 1 deletion b/‎base_sampling2/master/ggml/src/ggml-vulkan/ggml-vulkan.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎base_sampling2/master/ggml/src/ggml.c‎
Lines changed: 9 additions & 2 deletions b/‎base_sampling2/master/ggml/src/ggml.c‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎base_sampling2/master/ggml/src/ggml.cpp‎
Lines changed: 26 additions & 0 deletions b/‎base_sampling2/master/ggml/src/ggml.cpp‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎base_sampling2/master/ggml/src/gguf.cpp‎
Lines changed: 19 additions & 2 deletions b/‎base_sampling2/master/ggml/src/gguf.cpp‎
Lines changed: 19 additions & 2 deletions
@@ -480,6 +480,9 @@ OBJS_GGUF_LLAMA = \
     $(TMP)$(PREFIX)_llama-impl.o \
     $(TMP)$(PREFIX)_llama-io.o \
     $(TMP)$(PREFIX)_llama-kv-cache.o \
+    $(TMP)$(PREFIX)_llama-kv-cache-unified.o \
+    $(TMP)$(PREFIX)_llama-kv-cache-unified-iswa.o \
+    $(TMP)$(PREFIX)_llama-kv-cache-recurrent.o \
     $(TMP)$(PREFIX)_llama-memory.o \
     $(TMP)$(PREFIX)_llama-mmap.o \
     $(TMP)$(PREFIX)_llama-model-loader.o \
 
@@ -118,7 +118,7 @@ int main(int argc, char ** argv) {
 
     std::string suffix_addon = "";
 
-    if (filename.rfind(".json") != filename.npos){
+    if (filename.rfind(".json") != filename.npos) {
         SetConsoleTitle("Loading a json file...");
         auto instantJson = getJson(filename);
         if (instantJson.contains("presets")){
@@ -152,6 +152,7 @@ int main(int argc, char ** argv) {
             //threadedChat.externalData = "Cycles left: " + std::to_string(regens);
         }
         std::string extract = extract_string_mod(inputPrompt, "{{","}}");
+        extract_logit_bias_strings(inputPrompt, "[[","]]" , settings.modelConfig[settings.modelName]);
         if (extract != "NULL") suffix_addon = extract;
     }
 
 
@@ -2095,9 +2095,6 @@ extern "C" {
     GGML_API struct ggml_tensor * ggml_graph_get_grad    (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
     GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
 
-    GGML_API void                 ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
-    GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
-
     // print info and performance information for the graph
     GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
 
 
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
     target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES})
     target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
 else()
-    message(ERROR "BLAS not found, please refer to "
-                  "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
-                  " to set correct GGML_BLAS_VENDOR")
+    message(FATAL_ERROR "BLAS not found, please refer to "
+                        "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
+                        " to set correct GGML_BLAS_VENDOR")
 endif()
@@ -2425,15 +2425,14 @@ static bool ggml_thread_apply_priority(int32_t prio) {
         case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
     }
 
-#if defined(__GNUC__)
-// MinGW doesn't support THREAD_POWER_THROTTLING_CURRENT_VERSION and THREAD_POWER_THROTTLING_EXECUTION_SPEED
-#else
     if (prio != GGML_SCHED_PRIO_LOW) {
         // Tell Windows that this thread should not be throttled (needs its own CPU core).
         // Newer Windows 11 versions aggresively park (offline) CPU cores and often place
         // all our threads onto the first 4 cores which results in terrible performance with
         // n_threads > 4
-        #if _WIN32_WINNT >= 0x0602
+        // MinGW doesn't support THREAD_POWER_THROTTLING_CURRENT_VERSION
+        // and THREAD_POWER_THROTTLING_EXECUTION_SPEED
+        #if !defined(__GNUC__) && _WIN32_WINNT >= 0x0602
         THREAD_POWER_THROTTLING_STATE t;
         ZeroMemory(&t, sizeof(t));
         t.Version     = THREAD_POWER_THROTTLING_CURRENT_VERSION;
@@ -2446,7 +2445,6 @@ static bool ggml_thread_apply_priority(int32_t prio) {
         }
         #endif
     }
-#endif
 
     if (prio == GGML_SCHED_PRIO_NORMAL) {
         // Keep inherited policy/priority
 
@@ -32,6 +32,8 @@
 extern "C" {
 #endif
 
+void ggml_print_backtrace(void);
+
 #ifndef MIN
 #    define MIN(a, b) ((a) < (b) ? (a) : (b))
 #endif
 
@@ -1652,7 +1652,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_
         return {64, 32};
     }
     return {64, 64};
-};
+}
 
 static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {
 
 
@@ -133,7 +133,7 @@ static void ggml_print_backtrace_symbols(void) {
 }
 #endif
 
-static void ggml_print_backtrace(void) {
+void ggml_print_backtrace(void) {
     const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
     if (GGML_NO_BACKTRACE) {
         return;
@@ -160,13 +160,18 @@ static void ggml_print_backtrace(void) {
     const int parent_pid = getpid();
     const int child_pid = fork();
     if (child_pid < 0) { // error
+#if defined(__linux__)
+        close(lock[1]);
+        close(lock[0]);
+#endif
         return;
     } else if (child_pid == 0) { // child
         char attach[32];
         snprintf(attach, sizeof(attach), "attach %d", parent_pid);
 #if defined(__linux__)
         close(lock[1]);
         (void) !read(lock[0], lock, 1);
+        close(lock[0]);
 #endif
         // try gdb
         execlp("gdb", "gdb", "--batch",
@@ -195,7 +200,7 @@ static void ggml_print_backtrace(void) {
     }
 }
 #else
-static void ggml_print_backtrace(void) {
+void ggml_print_backtrace(void) {
     // platform not supported
 }
 #endif
@@ -216,6 +221,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
     abort();
 }
 
+// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
+
 //
 // logging
 //
 
@@ -0,0 +1,26 @@
+#include "ggml-impl.h"
+
+#include <cstdlib>
+#include <exception>
+
+static std::terminate_handler previous_terminate_handler;
+
+GGML_NORETURN static void ggml_uncaught_exception() {
+    ggml_print_backtrace();
+    if (previous_terminate_handler) {
+        previous_terminate_handler();
+    }
+    abort(); // unreachable unless previous_terminate_handler was nullptr
+}
+
+static bool ggml_uncaught_exception_init = []{
+    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
+    if (GGML_NO_BACKTRACE) {
+        return false;
+    }
+    const auto prev{std::get_terminate()};
+    GGML_ASSERT(prev != ggml_uncaught_exception);
+    previous_terminate_handler = prev;
+    std::set_terminate(ggml_uncaught_exception);
+    return true;
+}();
@@ -347,11 +347,28 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     int64_t n_tensors = 0;
 
     if (ok && gr.read(ctx->version)) {
-        if (ctx->version == 1) {
+        if (ok && ctx->version == 0) {
+            GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
+            ok = false;
+        }
+
+        /*
+         * bit layout is different when reading non-native endian models.
+         * assuming that the GGUF version is 3, the non-native endian model
+         * would read it as 0x30000000. we can use the AND operation against
+         * the last 4 hexadecimal digits to check if the model is the same
+         * endianness as the host system.
+        */
+        if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
+            GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
+            ok = false;
+        }
+
+        if (ok && ctx->version == 1) {
             GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
             ok = false;
         }
-        if (ctx->version > GGUF_VERSION) {
+        if (ok && ctx->version > GGUF_VERSION) {
             GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
                 __func__, ctx->version, GGUF_VERSION);
             ok = false;
Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ int main(int argc, char ** argv) {`
`118`	`118`
`119`	`119`	`std::string suffix_addon = "";`
`120`	`120`
`121`		`- if (filename.rfind(".json") != filename.npos){`
	`121`	`+ if (filename.rfind(".json") != filename.npos) {`
`122`	`122`	`SetConsoleTitle("Loading a json file...");`
`123`	`123`	`auto instantJson = getJson(filename);`
`124`	`124`	`if (instantJson.contains("presets")){`
`@@ -152,6 +152,7 @@ int main(int argc, char ** argv) {`
`152`	`152`	`//threadedChat.externalData = "Cycles left: " + std::to_string(regens);`
`153`	`153`	`}`
`154`	`154`	`std::string extract = extract_string_mod(inputPrompt, "{{","}}");`
	`155`	`+ extract_logit_bias_strings(inputPrompt, "[[","]]" , settings.modelConfig[settings.modelName]);`
`155`	`156`	`if (extract != "NULL") suffix_addon = extract;`
`156`	`157`	`}`
`157`	`158`
Original file line number	Diff line number	Diff line change
`@@ -1652,7 +1652,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_`
`1652`	`1652`	`return {64, 32};`
`1653`	`1653`	`}`
`1654`	`1654`	`return {64, 64};`
`1655`		`-};`
	`1655`	`+}`
`1656`	`1656`
`1657`	`1657`	`static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {`
`1658`	`1658`