Skip to content

Commit 7c054e2

Browse files
authored
Merge branch 'layla-build' into merge
2 parents 71e74a3 + 8e70dd9 commit 7c054e2

File tree

828 files changed

+169834
-131
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

828 files changed

+169834
-131
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ autogen-*.md
7777
!.github/workflows/*.yml
7878

7979
# Models
80-
8180
models/*
8281
models-mnt
8382
!models/.editorconfig
@@ -146,3 +145,4 @@ poetry.toml
146145
# Local scripts
147146
/run-vim.sh
148147
/run-chat.sh
148+
HEXAGON_Tools/

CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,24 @@ set(CMAKE_WARN_UNUSED_CLI YES)
77

88
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
99

10+
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
11+
if(DEFINED HTP_ARCH_VERSION)
12+
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
13+
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
14+
#set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -ffp-model=fast -fno-finite-math-only")
15+
16+
# this set of flag is more general (without the cortex cpu optimisation, which is only available on very very modern archs)
17+
set(OPT_FLAG " -O3 -ffp-model=fast -fno-finite-math-only")
18+
19+
message("OPT_FLAG:${OPT_FLAG}")
20+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
21+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
22+
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
23+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
24+
endif()
25+
endif()
26+
endif()
27+
1028
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
1129
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
1230
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
@@ -120,6 +138,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
120138
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
121139
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
122140
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
141+
llama_option_depr(WARNING LLAMA_HEXAGON GGML_HEXAGON)
123142

124143
if (NOT MSVC)
125144
if (LLAMA_SANITIZE_THREAD)

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
10791079
auto mparams = llama_model_default_params();
10801080

10811081
if (!params.devices.empty()) {
1082+
// add nullptr to the end just in case
1083+
params.devices.push_back(nullptr);
1084+
10821085
mparams.devices = params.devices.data();
10831086
}
10841087

common/sampling.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ struct ring_buffer {
6262
return value;
6363
}
6464

65+
T pop_back() {
66+
if (sz == 0) {
67+
throw std::runtime_error("ring buffer is empty");
68+
}
69+
// Move pos backwards, wrapping around if necessary
70+
pos = (pos == 0) ? capacity - 1 : pos - 1;
71+
T value = data[pos];
72+
sz--;
73+
return value;
74+
}
75+
6576
const T & rat(size_t i) const {
6677
if (i >= sz) {
6778
throw std::runtime_error("ring buffer: index out of bounds");
@@ -313,6 +324,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
313324
llama_sampler_reset(gsmpl->chain);
314325
}
315326

327+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
328+
llama_sampler_reset(gsmpl->grmr);
329+
330+
gsmpl->grmr = llama_sampler_init_grammar(llama_model_get_vocab(model), grammar, "root");
331+
}
332+
316333
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
317334
return new common_sampler {
318335
/* .params = */ gsmpl->params,
@@ -466,6 +483,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
466483
return result;
467484
}
468485

486+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
487+
return gsmpl->prev.to_vector();
488+
}
489+
490+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
491+
if(rollback_num > gsmpl->prev.size()) {
492+
rollback_num = gsmpl->prev.size();
493+
}
494+
495+
// continuously pop the last token
496+
for(int i = 0; i < rollback_num; i++) {
497+
gsmpl->prev.pop_back();
498+
}
499+
}
500+
469501
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
470502
switch (cnstr) {
471503
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
4647
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4748

4849
// arguments can be nullptr to skip printing
@@ -96,6 +97,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
9697

9798
// get a string representation of the last accepted tokens
9899
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
100+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
101+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
99102

100103
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
101104
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

ggml/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels"
205205
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
206206
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
207207
"gmml: OpenCL API version to target")
208+
option(GGML_HEXAGON "ggml: use HEXAGON" OFF)
208209

209210
# toolchain for vulkan-shaders-gen
210211
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@@ -270,9 +271,17 @@ set(GGML_PUBLIC_HEADERS
270271
include/ggml-rpc.h
271272
include/ggml-sycl.h
272273
include/ggml-vulkan.h
274+
include/ggml-hexagon.h
273275
include/gguf.h)
274276

275277
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
278+
279+
# link android log library
280+
if(ANDROID)
281+
find_library(log-lib log)
282+
target_link_libraries(ggml PRIVATE ${log-lib})
283+
endif()
284+
276285
#if (GGML_METAL)
277286
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
278287
#endif()

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ extern "C" {
202202
//
203203
// Backend registry
204204
//
205+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon);
205206

206207
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
207208

ggml/include/ggml-hexagon.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) 2024-2025 The ggml authors
3+
*/
4+
#pragma once
5+
6+
#include "ggml.h"
7+
#include "ggml-backend.h"
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
#define GGML_HEXAGON_MAX_DEVICES 4
14+
#define GGML_HEXAGON_BACKEND_NAME "hexagon"
15+
16+
enum HEXAGONBackend {
17+
HEXAGON_BACKEND_QNNCPU = 0,
18+
HEXAGON_BACKEND_QNNGPU = 1,
19+
HEXAGON_BACKEND_QNNNPU = 2,
20+
HEXAGON_BACKEND_CDSP = 3,
21+
HEXAGON_BACKEND_GGML = 4, //"fake" HEXAGON backend for compare performance between HEXAGON backend and ggml backend
22+
};
23+
24+
GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(size_t dev_num, const char * qnn_lib_path);
25+
26+
GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);
27+
28+
GGML_BACKEND_API int ggml_backend_hexagon_get_device_count(void);
29+
30+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
31+
32+
const char * ggml_backend_hexagon_get_devname(size_t dev_num);
33+
34+
#ifdef __cplusplus
35+
}
36+
#endif

ggml/src/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,14 @@ add_library(ggml-base
206206
ggml-quants.h
207207
gguf.cpp)
208208

209+
# Search for the 'log' library on Android
210+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
211+
find_library(log-lib log)
212+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${log-lib})
213+
214+
target_link_libraries(ggml-base PUBLIC ${GGML_EXTRA_LIBS})
215+
endif()
216+
209217
target_include_directories(ggml-base PRIVATE .)
210218
if (GGML_BACKEND_DL)
211219
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
@@ -321,6 +329,7 @@ ggml_add_backend(RPC)
321329
ggml_add_backend(SYCL)
322330
ggml_add_backend(Vulkan)
323331
ggml_add_backend(OpenCL)
332+
ggml_add_backend(HEXAGON)
324333

325334
foreach (target ggml-base ggml)
326335
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)

ggml/src/ggml-backend-reg.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@
6565
#include "ggml-kompute.h"
6666
#endif
6767

68+
#ifdef GGML_USE_HEXAGON
69+
#include "ggml-hexagon.h"
70+
#endif
71+
6872
// disable C++17 deprecation warning for std::codecvt_utf8
6973
#if defined(__clang__)
7074
# pragma clang diagnostic push
@@ -155,6 +159,10 @@ struct ggml_backend_reg_entry {
155159
dl_handle_ptr handle;
156160
};
157161

162+
static bool laylaUseVulkan = false;
163+
static bool laylaUseOpenCL = false;
164+
static bool laylaUseHexagon = false;
165+
158166
struct ggml_backend_registry {
159167
std::vector<ggml_backend_reg_entry> backends;
160168
std::vector<ggml_backend_dev_t> devices;
@@ -170,10 +178,14 @@ struct ggml_backend_registry {
170178
register_backend(ggml_backend_sycl_reg());
171179
#endif
172180
#ifdef GGML_USE_VULKAN
173-
register_backend(ggml_backend_vk_reg());
181+
if(laylaUseVulkan) {
182+
register_backend(ggml_backend_vk_reg());
183+
}
174184
#endif
175185
#ifdef GGML_USE_OPENCL
176-
register_backend(ggml_backend_opencl_reg());
186+
if(laylaUseOpenCL) {
187+
register_backend(ggml_backend_opencl_reg());
188+
}
177189
#endif
178190
#ifdef GGML_USE_CANN
179191
register_backend(ggml_backend_cann_reg());
@@ -187,6 +199,11 @@ struct ggml_backend_registry {
187199
#ifdef GGML_USE_KOMPUTE
188200
register_backend(ggml_backend_kompute_reg());
189201
#endif
202+
#ifdef GGML_USE_HEXAGON
203+
if(laylaUseHexagon) {
204+
register_backend(ggml_backend_hexagon_reg());
205+
}
206+
#endif
190207
#ifdef GGML_USE_CPU
191208
register_backend(ggml_backend_cpu_reg());
192209
#endif
@@ -296,8 +313,15 @@ struct ggml_backend_registry {
296313
}
297314
};
298315

316+
void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon) {
317+
laylaUseVulkan = useVulkan;
318+
laylaUseOpenCL = useOpenCL;
319+
laylaUseHexagon = useHexagon;
320+
}
321+
299322
static ggml_backend_registry & get_reg() {
300323
static ggml_backend_registry reg;
324+
301325
return reg;
302326
}
303327

@@ -577,6 +601,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
577601
ggml_backend_load_best("vulkan", silent, dir_path);
578602
ggml_backend_load_best("opencl", silent, dir_path);
579603
ggml_backend_load_best("musa", silent, dir_path);
604+
ggml_backend_load_best("hexagon", silent, dir_path);
580605
ggml_backend_load_best("cpu", silent, dir_path);
581606
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
582607
const char * backend_path = std::getenv("GGML_BACKEND_PATH");

0 commit comments

Comments
 (0)