Skip to content

Commit 189f585

Browse files
authored
Merge branch 'layla-build' into merge
2 parents 5ceed62 + 2338cf0 commit 189f585

File tree

1,066 files changed

+208285
-184
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,066 files changed

+208285
-184
lines changed

.gitignore

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,11 @@
6565
!/.github/workflows/*.yml
6666

6767
# Models
68-
69-
/models/*
70-
/models-mnt
71-
!/models/.editorconfig
72-
!/models/ggml-vocab-*.gguf*
73-
!/models/templates
68+
models/*
69+
models-mnt
70+
!models/.editorconfig
71+
!models/ggml-vocab-*.gguf*
72+
!models/templates
7473

7574
# Zig
7675
/zig-out/
@@ -129,10 +128,20 @@ poetry.toml
129128
# Local scripts
130129
/run-vim.sh
131130
/run-chat.sh
132-
/.ccache/
131+
132+
.ccache/
133133

134134
# IDE
135-
/*.code-workspace
136-
/.windsurf/
137-
# emscripten
138-
a.out.*
135+
*.code-workspace
136+
.windsurf/
137+
138+
HEXAGON_Tools/
139+
prebuilts/QNN_SDK/qairt/2.35.0.250530/
140+
prebuilts/QNN_SDK/qairt/2.36.0.250627/
141+
prebuilts/QNN_SDK/v2.35.0.250530.zip
142+
prebuilts/QNN_SDK/v2.36.0.250627.zip
143+
prebuilts/Hexagon_SDK/minimal-hexagon-sdk-6.2.0.1.xz
144+
prebuilts/OpenCL_SDK/
145+
prebuilts/Vulkan_SDK/
146+
147+
pkg-adb/

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
152152
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
153153
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
154154
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
155+
llama_option_depr(WARNING LLAMA_HEXAGON GGML_HEXAGON)
155156

156157
if (NOT MSVC)
157158
if (LLAMA_SANITIZE_THREAD)

CMakeUserPresets.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"version": 4,
3+
"configurePresets": [
4+
{
5+
"name": "arm64-android-snapdragon",
6+
"hidden": true,
7+
"architecture": { "value": "arm64", "strategy": "external" },
8+
"toolset": { "value": "host=x86_64", "strategy": "external" },
9+
"cacheVariables": {
10+
"ANDROID_ABI": "arm64-v8a",
11+
"ANDROID_PLATFORM": "android-31",
12+
"CMAKE_TOOLCHAIN_FILE": "$env{ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake",
13+
"CMAKE_C_FLAGS": "-march=armv8.6-a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
14+
"CMAKE_CXX_FLAGS": "-march=armv8.6-a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
15+
"CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG",
16+
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
17+
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
18+
"CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
19+
"HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
20+
"PREBUILT_LIB_DIR": "android_aarch64",
21+
"GGML_OPENMP": "OFF",
22+
"GGML_LLAMAFILE": "OFF",
23+
"GGML_OPENCL": "OFF",
24+
"GGML_HEXAGON": "ON",
25+
"LLAMA_CURL": "OFF",
26+
"GGML_BACKEND_DL": "ON"
27+
}
28+
},
29+
30+
{
31+
"name": "arm64-windows-snapdragon",
32+
"inherits": [ "base", "arm64-windows-llvm" ],
33+
"cacheVariables": {
34+
"HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
35+
"PREBUILT_LIB_DIR": "windows_aarch64",
36+
"GGML_OPENMP": "OFF",
37+
"GGML_LLAMAFILE": "OFF",
38+
"GGML_OPENCL": "ON",
39+
"GGML_HEXAGON": "ON",
40+
"LLAMA_CURL": "OFF"
41+
}
42+
},
43+
44+
{ "name": "arm64-android-snapdragon-debug" , "inherits": [ "base", "arm64-android-snapdragon", "debug" ] },
45+
{ "name": "arm64-android-snapdragon-release", "inherits": [ "base", "arm64-android-snapdragon", "release" ] },
46+
47+
{ "name": "arm64-windows-snapdragon-debug" , "inherits": [ "base", "arm64-windows-snapdragon", "debug" ] },
48+
{ "name": "arm64-windows-snapdragon-release", "inherits": [ "base", "arm64-windows-snapdragon", "release" ] }
49+
]
50+
}

common/CMakeLists.txt

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ llama_add_compile_flags()
66

77
# Build info header
88
#
9-
10-
if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
11-
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
9+
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
10+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
1211

1312
# Is git submodule
1413
if(NOT IS_DIRECTORY "${GIT_DIR}")
@@ -18,7 +17,7 @@ if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
1817
if (SLASH_POS EQUAL 0)
1918
set(GIT_DIR "${REAL_GIT_DIR}")
2019
else()
21-
set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
20+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
2221
endif()
2322
endif()
2423

@@ -32,6 +31,38 @@ else()
3231
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
3332
endif()
3433

34+
# Find Git executable
35+
find_package(Git)
36+
37+
# Get git commit hash
38+
if(GIT_FOUND AND EXISTS "${GIT_DIR}")
39+
execute_process(
40+
COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
41+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
42+
OUTPUT_VARIABLE LLAMA_BUILD_COMMIT
43+
OUTPUT_STRIP_TRAILING_WHITESPACE
44+
ERROR_QUIET
45+
)
46+
47+
# Get build number from git commit count
48+
execute_process(
49+
COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
50+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
51+
OUTPUT_VARIABLE LLAMA_BUILD_NUMBER
52+
OUTPUT_STRIP_TRAILING_WHITESPACE
53+
ERROR_QUIET
54+
)
55+
else()
56+
set(LLAMA_BUILD_COMMIT "unknown")
57+
set(LLAMA_BUILD_NUMBER 0)
58+
endif()
59+
60+
# Set compiler info
61+
set(BUILD_COMPILER "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
62+
63+
# Set build target
64+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
65+
3566
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
3667
set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
3768
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
12291229
auto mparams = llama_model_default_params();
12301230

12311231
if (!params.devices.empty()) {
1232+
// add nullptr to the end just in case
1233+
params.devices.push_back(nullptr);
1234+
12321235
mparams.devices = params.devices.data();
12331236
}
12341237

common/sampling.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,17 @@ struct ring_buffer {
6363
return value;
6464
}
6565

66+
T pop_back() {
67+
if (sz == 0) {
68+
throw std::runtime_error("ring buffer is empty");
69+
}
70+
// Move pos backwards, wrapping around if necessary
71+
pos = (pos == 0) ? capacity - 1 : pos - 1;
72+
T value = data[pos];
73+
sz--;
74+
return value;
75+
}
76+
6677
const T & rat(size_t i) const {
6778
if (i >= sz) {
6879
throw std::runtime_error("ring buffer: index out of bounds");
@@ -327,6 +338,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
327338
gsmpl->reset();
328339
}
329340

341+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
342+
llama_sampler_reset(gsmpl->grmr);
343+
344+
gsmpl->grmr = llama_sampler_init_grammar(llama_model_get_vocab(model), grammar, "root");
345+
}
346+
330347
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
331348
return new common_sampler {
332349
/* .params = */ gsmpl->params,
@@ -542,6 +559,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
542559
return result;
543560
}
544561

562+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
563+
return gsmpl->prev.to_vector();
564+
}
565+
566+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
567+
if(rollback_num > gsmpl->prev.size()) {
568+
rollback_num = gsmpl->prev.size();
569+
}
570+
571+
// continuously pop the last token
572+
for(int i = 0; i < rollback_num; i++) {
573+
gsmpl->prev.pop_back();
574+
}
575+
}
576+
545577
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
546578
switch (cnstr) {
547579
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
4647
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4748

4849
// arguments can be nullptr to skip printing
@@ -98,6 +99,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
9899

99100
// get a string representation of the last accepted tokens
100101
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
102+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
103+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
101104

102105
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
103106
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

docs/backend/hexagon/CMakeUserPresets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"PREBUILT_LIB_DIR": "android_aarch64",
2121
"GGML_OPENMP": "OFF",
2222
"GGML_LLAMAFILE": "OFF",
23-
"GGML_OPENCL": "ON",
23+
"GGML_OPENCL": "OFF",
2424
"GGML_HEXAGON": "ON",
2525
"LLAMA_CURL": "OFF"
2626
}

ggml/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels"
252252
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
253253
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
254254
"gmml: OpenCL API version to target")
255+
option(GGML_HEXAGON "ggml: use HEXAGON" OFF)
255256

256257
option(GGML_HEXAGON "ggml: enable Hexagon backend" OFF)
257258

@@ -319,9 +320,17 @@ set(GGML_PUBLIC_HEADERS
319320
include/ggml-sycl.h
320321
include/ggml-vulkan.h
321322
include/ggml-webgpu.h
323+
include/ggml-hexagon.h
322324
include/gguf.h)
323325

324326
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
327+
328+
# link android log library
329+
if(ANDROID)
330+
find_library(log-lib log)
331+
target_link_libraries(ggml PRIVATE ${log-lib})
332+
endif()
333+
325334
#if (GGML_METAL)
326335
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
327336
#endif()

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ extern "C" {
214214
//
215215
// Backend registry
216216
//
217+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon, bool useMetal);
217218

218219
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
219220

0 commit comments

Comments
 (0)