l3utterfly
diff --git a/‎.gitignore‎
Lines changed: 20 additions & 11 deletions b/‎.gitignore‎
Lines changed: 20 additions & 11 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CMakeUserPresets.json‎
Lines changed: 50 additions & 0 deletions b/‎CMakeUserPresets.json‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 35 additions & 4 deletions b/‎common/CMakeLists.txt‎
Lines changed: 35 additions & 4 deletions
diff --git a/‎common/common.cpp‎
Lines changed: 3 additions & 0 deletions b/‎common/common.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎common/sampling.cpp‎
Lines changed: 32 additions & 0 deletions b/‎common/sampling.cpp‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎common/sampling.h‎
Lines changed: 3 additions & 0 deletions b/‎common/sampling.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/backend/hexagon/CMakeUserPresets.json‎
Lines changed: 1 addition & 1 deletion b/‎docs/backend/hexagon/CMakeUserPresets.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ggml/CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions b/‎ggml/CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎ggml/include/ggml-backend.h‎
Lines changed: 1 addition & 0 deletions b/‎ggml/include/ggml-backend.h‎
Lines changed: 1 addition & 0 deletions
@@ -65,12 +65,11 @@
 !/.github/workflows/*.yml
 
 # Models
-
-/models/*
-/models-mnt
-!/models/.editorconfig
-!/models/ggml-vocab-*.gguf*
-!/models/templates
+models/*
+models-mnt
+!models/.editorconfig
+!models/ggml-vocab-*.gguf*
+!models/templates
 
 # Zig
 /zig-out/
@@ -129,10 +128,20 @@ poetry.toml
 # Local scripts
 /run-vim.sh
 /run-chat.sh
-/.ccache/
+
+.ccache/
 
 # IDE
-/*.code-workspace
-/.windsurf/
-# emscripten
-a.out.*
+*.code-workspace
+.windsurf/
+
+HEXAGON_Tools/
+prebuilts/QNN_SDK/qairt/2.35.0.250530/
+prebuilts/QNN_SDK/qairt/2.36.0.250627/
+prebuilts/QNN_SDK/v2.35.0.250530.zip
+prebuilts/QNN_SDK/v2.36.0.250627.zip
+prebuilts/Hexagon_SDK/minimal-hexagon-sdk-6.2.0.1.xz
+prebuilts/OpenCL_SDK/
+prebuilts/Vulkan_SDK/
+
+pkg-adb/
@@ -152,6 +152,7 @@ llama_option_depr(WARNING     LLAMA_RPC                 GGML_RPC)
 llama_option_depr(WARNING     LLAMA_SYCL                GGML_SYCL)
 llama_option_depr(WARNING     LLAMA_SYCL_F16            GGML_SYCL_F16)
 llama_option_depr(WARNING     LLAMA_CANN                GGML_CANN)
+llama_option_depr(WARNING     LLAMA_HEXAGON             GGML_HEXAGON)
 
 if (NOT MSVC)
     if (LLAMA_SANITIZE_THREAD)
 
@@ -0,0 +1,50 @@
+{
+  "version": 4,
+  "configurePresets": [
+    {
+        "name": "arm64-android-snapdragon",
+        "hidden": true,
+        "architecture": { "value": "arm64",       "strategy": "external" },
+        "toolset":      { "value": "host=x86_64", "strategy": "external" },
+        "cacheVariables": {
+            "ANDROID_ABI":      "arm64-v8a",
+            "ANDROID_PLATFORM": "android-31",
+            "CMAKE_TOOLCHAIN_FILE": "$env{ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake",
+            "CMAKE_C_FLAGS":   "-march=armv8.6-a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
+            "CMAKE_CXX_FLAGS": "-march=armv8.6-a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
+            "CMAKE_C_FLAGS_RELEASE":          "-O3 -DNDEBUG",
+            "CMAKE_CXX_FLAGS_RELEASE":        "-O3 -DNDEBUG",
+            "CMAKE_C_FLAGS_RELWITHDEBINFO":   "-O3 -DNDEBUG -g",
+            "CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
+            "HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
+            "PREBUILT_LIB_DIR": "android_aarch64",
+            "GGML_OPENMP":      "OFF",
+            "GGML_LLAMAFILE":   "OFF",
+            "GGML_OPENCL":      "OFF",
+            "GGML_HEXAGON":     "ON",
+            "LLAMA_CURL":       "OFF",
+            "GGML_BACKEND_DL":  "ON"
+        }
+    },
+
+    {
+        "name": "arm64-windows-snapdragon",
+        "inherits": [ "base", "arm64-windows-llvm" ],
+        "cacheVariables": {
+            "HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
+            "PREBUILT_LIB_DIR": "windows_aarch64",
+            "GGML_OPENMP":      "OFF",
+            "GGML_LLAMAFILE":   "OFF",
+            "GGML_OPENCL":      "ON",
+            "GGML_HEXAGON":     "ON",
+            "LLAMA_CURL":       "OFF"
+        }
+    },
+
+    { "name": "arm64-android-snapdragon-debug"  , "inherits": [ "base", "arm64-android-snapdragon", "debug" ] },
+    { "name": "arm64-android-snapdragon-release", "inherits": [ "base", "arm64-android-snapdragon", "release" ] },
+
+    { "name": "arm64-windows-snapdragon-debug"  , "inherits": [ "base", "arm64-windows-snapdragon", "debug" ] },
+    { "name": "arm64-windows-snapdragon-release", "inherits": [ "base", "arm64-windows-snapdragon", "release" ] }
+  ]
+}
@@ -6,9 +6,8 @@ llama_add_compile_flags()
 
 # Build info header
 #
-
-if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
-    set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
+    set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
 
     # Is git submodule
     if(NOT IS_DIRECTORY "${GIT_DIR}")
@@ -18,7 +17,7 @@ if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
         if (SLASH_POS EQUAL 0)
             set(GIT_DIR "${REAL_GIT_DIR}")
         else()
-            set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
+            set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
         endif()
     endif()
 
@@ -32,6 +31,38 @@ else()
     message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
 endif()
 
+# Find Git executable
+find_package(Git)
+
+# Get git commit hash
+if(GIT_FOUND AND EXISTS "${GIT_DIR}")
+    execute_process(
+            COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
+            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+            OUTPUT_VARIABLE LLAMA_BUILD_COMMIT
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+            ERROR_QUIET
+    )
+
+    # Get build number from git commit count
+    execute_process(
+            COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
+            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+            OUTPUT_VARIABLE LLAMA_BUILD_NUMBER
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+            ERROR_QUIET
+    )
+else()
+    set(LLAMA_BUILD_COMMIT "unknown")
+    set(LLAMA_BUILD_NUMBER 0)
+endif()
+
+# Set compiler info
+set(BUILD_COMPILER "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
+
+# Set build target
+set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
+
 set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
 set(OUTPUT_FILE   "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
 configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
 
@@ -1229,6 +1229,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
     auto mparams = llama_model_default_params();
 
     if (!params.devices.empty()) {
+        // add nullptr to the end just in case
+        params.devices.push_back(nullptr);
+
         mparams.devices = params.devices.data();
     }
 
 
@@ -63,6 +63,17 @@ struct ring_buffer {
         return value;
     }
 
+    T pop_back() {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        // Move pos backwards, wrapping around if necessary
+        pos = (pos == 0) ? capacity - 1 : pos - 1;
+        T value = data[pos];
+        sz--;
+        return value;
+    }
+
     const T & rat(size_t i) const {
         if (i >= sz) {
             throw std::runtime_error("ring buffer: index out of bounds");
@@ -327,6 +338,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
     gsmpl->reset();
 }
 
+void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
+    llama_sampler_reset(gsmpl->grmr);
+
+    gsmpl->grmr = llama_sampler_init_grammar(llama_model_get_vocab(model), grammar, "root");
+}
+
 struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
     return new common_sampler {
         /* .params = */ gsmpl->params,
@@ -542,6 +559,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
     return result;
 }
 
+const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
+    return gsmpl->prev.to_vector();
+}
+
+void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
+    if(rollback_num > gsmpl->prev.size()) {
+        rollback_num = gsmpl->prev.size();
+    }
+
+    // continuously pop the last token
+    for(int i = 0; i < rollback_num; i++) {
+        gsmpl->prev.pop_back();
+    }
+}
+
 char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
     switch (cnstr) {
         case COMMON_SAMPLER_TYPE_DRY:         return 'd';
 
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
 // if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
 void                    common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
 void                    common_sampler_reset (struct common_sampler * gsmpl);
+void                    common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
 struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
 
 // arguments can be nullptr to skip printing
@@ -98,6 +99,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
 
 // get a string representation of the last accepted tokens
 std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
+const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
+void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
 
 char        common_sampler_type_to_chr(enum common_sampler_type cnstr);
 std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
 
@@ -20,7 +20,7 @@
             "PREBUILT_LIB_DIR": "android_aarch64",
             "GGML_OPENMP":      "OFF",
             "GGML_LLAMAFILE":   "OFF",
-            "GGML_OPENCL":      "ON",
+            "GGML_OPENCL":      "OFF",
             "GGML_HEXAGON":     "ON",
             "LLAMA_CURL":       "OFF"
         }
 
@@ -252,6 +252,7 @@ option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"
 option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON)
 set   (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
                                             "gmml: OpenCL API version to target")
+option(GGML_HEXAGON                         "ggml: use HEXAGON"                               OFF)
 
 option(GGML_HEXAGON                         "ggml: enable Hexagon backend"                    OFF)
 
@@ -319,9 +320,17 @@ set(GGML_PUBLIC_HEADERS
     include/ggml-sycl.h
     include/ggml-vulkan.h
     include/ggml-webgpu.h
+    include/ggml-hexagon.h
     include/gguf.h)
 
 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
+
+# link android log library
+if(ANDROID)
+    find_library(log-lib log)
+    target_link_libraries(ggml PRIVATE ${log-lib})
+endif()
+
 #if (GGML_METAL)
 #    set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
 #endif()
 
@@ -214,6 +214,7 @@ extern "C" {
     //
     // Backend registry
     //
+    GGML_API void               ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon, bool useMetal);
 
     GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
Original file line number	Diff line number	Diff line change
`@@ -1229,6 +1229,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {`
`1229`	`1229`	`auto mparams = llama_model_default_params();`
`1230`	`1230`
`1231`	`1231`	`if (!params.devices.empty()) {`
	`1232`	`+ // add nullptr to the end just in case`
	`1233`	`+ params.devices.push_back(nullptr);`
	`1234`	`+`
`1232`	`1235`	`mparams.devices = params.devices.data();`
`1233`	`1236`	`}`
`1234`	`1237`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`"PREBUILT_LIB_DIR": "android_aarch64",`
`21`	`21`	`"GGML_OPENMP": "OFF",`
`22`	`22`	`"GGML_LLAMAFILE": "OFF",`
`23`		`- "GGML_OPENCL": "ON",`
	`23`	`+ "GGML_OPENCL": "OFF",`
`24`	`24`	`"GGML_HEXAGON": "ON",`
`25`	`25`	`"LLAMA_CURL": "OFF"`
`26`	`26`	`}`