Skip to content

Commit a406da3

Browse files
authored
Merge branch 'layla-build' into merge
2 parents 4b2dae3 + 0d3df34 commit a406da3

File tree

1,074 files changed

+221426
-190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,074 files changed

+221426
-190
lines changed

.gitignore

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ autogen-*.md
7777
!.github/workflows/*.yml
7878

7979
# Models
80-
8180
models/*
8281
models-mnt
8382
!models/.editorconfig
@@ -147,8 +146,18 @@ poetry.toml
147146
# Local scripts
148147
/run-vim.sh
149148
/run-chat.sh
149+
150150
.ccache/
151151

152152
# IDE
153153
*.code-workspace
154154
.windsurf/
155+
156+
HEXAGON_Tools/
157+
prebuilts/QNN_SDK/qairt/2.35.0.250530/
158+
prebuilts/QNN_SDK/qairt/2.36.0.250627/
159+
prebuilts/QNN_SDK/v2.35.0.250530.zip
160+
prebuilts/QNN_SDK/v2.36.0.250627.zip
161+
prebuilts/Hexagon_SDK/minimal-hexagon-sdk-6.2.0.1.xz
162+
prebuilts/OpenCL_SDK/
163+
prebuilts/Vulkan_SDK/

CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,24 @@ set(CMAKE_WARN_UNUSED_CLI YES)
77

88
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
99

10+
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
11+
if(DEFINED HTP_ARCH_VERSION AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
12+
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
13+
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
14+
#set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -ffp-model=fast -fno-finite-math-only")
15+
16+
# this set of flag is more general (without the cortex cpu optimisation, which is only available on very very modern archs)
17+
set(OPT_FLAG " -O3 -flto -D_GNU_SOURCE -fvectorize -ffp-model=fast -fno-finite-math-only")
18+
19+
message("OPT_FLAG:${OPT_FLAG}")
20+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${DEBUG_FLAG} ${OPT_FLAG}")
21+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${DEBUG_FLAG} ${OPT_FLAG}")
22+
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${DEBUG_FLAG} ${OPT_FLAG}")
23+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${DEBUG_FLAG} ${OPT_FLAG}")
24+
endif()
25+
endif()
26+
endif()
27+
1028
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
1129
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
1230
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
@@ -137,6 +155,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
137155
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
138156
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
139157
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
158+
llama_option_depr(WARNING LLAMA_HEXAGON GGML_HEXAGON)
140159

141160
if (NOT MSVC)
142161
if (LLAMA_SANITIZE_THREAD)

common/CMakeLists.txt

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ llama_add_compile_flags()
66

77
# Build info header
88
#
9-
10-
if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
11-
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
9+
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
10+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
1211

1312
# Is git submodule
1413
if(NOT IS_DIRECTORY "${GIT_DIR}")
@@ -18,7 +17,7 @@ if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
1817
if (SLASH_POS EQUAL 0)
1918
set(GIT_DIR "${REAL_GIT_DIR}")
2019
else()
21-
set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
20+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
2221
endif()
2322
endif()
2423

@@ -32,6 +31,38 @@ else()
3231
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
3332
endif()
3433

34+
# Find Git executable
35+
find_package(Git)
36+
37+
# Get git commit hash
38+
if(GIT_FOUND AND EXISTS "${GIT_DIR}")
39+
execute_process(
40+
COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
41+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
42+
OUTPUT_VARIABLE LLAMA_BUILD_COMMIT
43+
OUTPUT_STRIP_TRAILING_WHITESPACE
44+
ERROR_QUIET
45+
)
46+
47+
# Get build number from git commit count
48+
execute_process(
49+
COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
50+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
51+
OUTPUT_VARIABLE LLAMA_BUILD_NUMBER
52+
OUTPUT_STRIP_TRAILING_WHITESPACE
53+
ERROR_QUIET
54+
)
55+
else()
56+
set(LLAMA_BUILD_COMMIT "unknown")
57+
set(LLAMA_BUILD_NUMBER 0)
58+
endif()
59+
60+
# Set compiler info
61+
set(BUILD_COMPILER "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
62+
63+
# Set build target
64+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
65+
3566
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
3667
set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
3768
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
11191119
auto mparams = llama_model_default_params();
11201120

11211121
if (!params.devices.empty()) {
1122+
// add nullptr to the end just in case
1123+
params.devices.push_back(nullptr);
1124+
11221125
mparams.devices = params.devices.data();
11231126
}
11241127

common/sampling.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ struct ring_buffer {
6262
return value;
6363
}
6464

65+
T pop_back() {
66+
if (sz == 0) {
67+
throw std::runtime_error("ring buffer is empty");
68+
}
69+
// Move pos backwards, wrapping around if necessary
70+
pos = (pos == 0) ? capacity - 1 : pos - 1;
71+
T value = data[pos];
72+
sz--;
73+
return value;
74+
}
75+
6576
const T & rat(size_t i) const {
6677
if (i >= sz) {
6778
throw std::runtime_error("ring buffer: index out of bounds");
@@ -313,6 +324,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
313324
llama_sampler_reset(gsmpl->chain);
314325
}
315326

327+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
328+
llama_sampler_reset(gsmpl->grmr);
329+
330+
gsmpl->grmr = llama_sampler_init_grammar(llama_model_get_vocab(model), grammar, "root");
331+
}
332+
316333
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
317334
return new common_sampler {
318335
/* .params = */ gsmpl->params,
@@ -488,6 +505,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
488505
return result;
489506
}
490507

508+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
509+
return gsmpl->prev.to_vector();
510+
}
511+
512+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
513+
if(rollback_num > gsmpl->prev.size()) {
514+
rollback_num = gsmpl->prev.size();
515+
}
516+
517+
// continuously pop the last token
518+
for(int i = 0; i < rollback_num; i++) {
519+
gsmpl->prev.pop_back();
520+
}
521+
}
522+
491523
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
492524
switch (cnstr) {
493525
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
4647
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4748

4849
// arguments can be nullptr to skip printing
@@ -98,6 +99,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
9899

99100
// get a string representation of the last accepted tokens
100101
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
102+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
103+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
101104

102105
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
103106
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

ggml/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels"
250250
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
251251
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
252252
"gmml: OpenCL API version to target")
253+
option(GGML_HEXAGON "ggml: use HEXAGON" OFF)
253254

254255
# toolchain for vulkan-shaders-gen
255256
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@@ -315,9 +316,17 @@ set(GGML_PUBLIC_HEADERS
315316
include/ggml-sycl.h
316317
include/ggml-vulkan.h
317318
include/ggml-webgpu.h
319+
include/ggml-hexagon.h
318320
include/gguf.h)
319321

320322
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
323+
324+
# link android log library
325+
if(ANDROID)
326+
find_library(log-lib log)
327+
target_link_libraries(ggml PRIVATE ${log-lib})
328+
endif()
329+
321330
#if (GGML_METAL)
322331
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
323332
#endif()

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ extern "C" {
214214
//
215215
// Backend registry
216216
//
217+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon, bool useMetal);
217218

218219
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
219220

ggml/include/ggml-hexagon.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright (c) 2024-2025 The ggml authors
3+
*/
4+
#pragma once
5+
6+
#include "ggml.h"
7+
#include "ggml-backend.h"
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
#define GGML_HEXAGON_MAX_DEVICES 4
14+
#define GGML_HEXAGON_BACKEND_NAME "hexagon"
15+
16+
enum HEXAGONBackend {
17+
HEXAGON_BACKEND_QNNCPU = 0,
18+
HEXAGON_BACKEND_QNNGPU = 1,
19+
HEXAGON_BACKEND_QNNNPU = 2,
20+
HEXAGON_BACKEND_CDSP = 3,
21+
HEXAGON_BACKEND_GGML = 4, //"fake" HEXAGON backend for compare performance between HEXAGON backend and ggml backend
22+
};
23+
24+
//0: general approach through QNN:offload ggmlop to QNN(QNNCPU, QNNGPU, QNNNPU)
25+
//1: special approach through QNN-SINGLEGRAPH:mapping entire ggml cgraph to a single QNN graph
26+
//2: general approach through Hexagon cDSP:offload ggmlop to Hexagon cDSP directly
27+
enum hwaccel_approach_type {
28+
HWACCEL_QNN = 0,
29+
HWACCEL_QNN_SINGLEGRAPH= 1,
30+
HWACCEL_CDSP = 2,
31+
};
32+
33+
GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(size_t dev_num, const char * qnn_lib_path);
34+
35+
GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);
36+
37+
GGML_BACKEND_API int ggml_backend_hexagon_get_device_count(void);
38+
39+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
40+
41+
GGML_BACKEND_API const char * ggml_backend_hexagon_get_devname(size_t dev_num);
42+
43+
GGML_BACKEND_API void ggml_backend_hexagon_set_cfg(int new_hexagon_backend, int new_hwaccel_approach);
44+
45+
GGML_BACKEND_API int ggml_backend_hexagon_get_mulmat_algotype(void);
46+
47+
GGML_BACKEND_API void ggml_backend_hexagon_set_mulmat_algotype(int new_mulmat_algotype);
48+
49+
#ifdef __cplusplus
50+
}
51+
#endif

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,7 @@ extern "C" {
688688

689689
// accepts a UTF-8 path, even on Windows
690690
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
691+
GGML_API FILE * ggml_fdopen(int fd, const char * mode, size_t fd_offset);
691692

692693
GGML_API void ggml_print_object (const struct ggml_object * obj);
693694
GGML_API void ggml_print_objects(const struct ggml_context * ctx);

0 commit comments

Comments
 (0)