Skip to content

Commit fdffcad

Browse files
committed
Refactor and enhance ggml backend implementations
- Updated the AArch64 CPU backend buffer type to use a static structure for better memory management. - Improved the initialization logic for CPU backend devices, ensuring proper context handling and initialization checks. - Modified Hexagon backend CMake configuration to remove unnecessary dependencies on external libraries and streamline the build process. - Added support for dynamic loading of libcdsprpc.so in the Hexagon backend, enhancing compatibility with various environments. - Introduced new grammar definitions for Chinese, Korean, and scheduling formats to improve language processing capabilities. - Implemented logging enhancements for Android, allowing for better error tracking and debugging. - Added Vulkan backend support with pre-generated shader files for cross-compilation, improving performance on Android platforms. - Cleaned up various source files by removing redundant code and ensuring consistent formatting.
2 parents 235a5f8 + 7ec4aaf commit fdffcad

33 files changed

+486
-166
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ autogen-*.md
7777
!.github/workflows/*.yml
7878

7979
# Models
80-
8180
models/*
8281
models-mnt
8382
!models/.editorconfig

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
10741074
auto mparams = llama_model_default_params();
10751075

10761076
if (!params.devices.empty()) {
1077+
// add nullptr to the end just in case
1078+
params.devices.push_back(nullptr);
1079+
10771080
mparams.devices = params.devices.data();
10781081
}
10791082

common/minja/chat-template.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#pragma once
1010

1111
#include "minja.hpp"
12-
1312
#include <chrono>
1413
#include <cstddef>
1514
#include <cstdio>
@@ -21,7 +20,6 @@
2120
#include <stdexcept>
2221
#include <string>
2322
#include <vector>
24-
2523
#include <json.hpp>
2624

2725
using json = nlohmann::ordered_json;

common/minja/minja.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
#include <unordered_set>
2929
#include <utility>
3030
#include <vector>
31-
3231
#include <json.hpp>
3332

3433
using json = nlohmann::ordered_json;

common/sampling.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ struct ring_buffer {
6262
return value;
6363
}
6464

65+
T pop_back() {
66+
if (sz == 0) {
67+
throw std::runtime_error("ring buffer is empty");
68+
}
69+
// Move pos backwards, wrapping around if necessary
70+
pos = (pos == 0) ? capacity - 1 : pos - 1;
71+
T value = data[pos];
72+
sz--;
73+
return value;
74+
}
75+
6576
const T & rat(size_t i) const {
6677
if (i >= sz) {
6778
throw std::runtime_error("ring buffer: index out of bounds");
@@ -314,6 +325,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
314325
llama_sampler_reset(gsmpl->chain);
315326
}
316327

328+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
329+
llama_sampler_reset(gsmpl->grmr);
330+
331+
gsmpl->grmr = llama_sampler_init_grammar(llama_model_get_vocab(model), grammar, "root");
332+
}
333+
317334
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
318335
return new common_sampler {
319336
/* .params = */ gsmpl->params,
@@ -467,6 +484,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
467484
return result;
468485
}
469486

487+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
488+
return gsmpl->prev.to_vector();
489+
}
490+
491+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
492+
if(rollback_num > gsmpl->prev.size()) {
493+
rollback_num = gsmpl->prev.size();
494+
}
495+
496+
// continuously pop the last token
497+
for(int i = 0; i < rollback_num; i++) {
498+
gsmpl->prev.pop_back();
499+
}
500+
}
501+
470502
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
471503
switch (cnstr) {
472504
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
4647
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4748

4849
// arguments can be nullptr to skip printing
@@ -96,6 +97,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
9697

9798
// get a string representation of the last accepted tokens
9899
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
100+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
101+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
99102

100103
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
101104
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

ggml/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,13 @@ set(GGML_PUBLIC_HEADERS
275275
include/gguf.h)
276276

277277
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
278+
279+
# link android log library
280+
if(ANDROID)
281+
find_library(log-lib log)
282+
target_link_libraries(ggml PRIVATE ${log-lib})
283+
endif()
284+
278285
#if (GGML_METAL)
279286
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
280287
#endif()

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ extern "C" {
202202
//
203203
// Backend registry
204204
//
205+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon);
205206

206207
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
207208

ggml/src/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,14 @@ add_library(ggml-base
203203
ggml-quants.h
204204
gguf.cpp)
205205

206+
# Search for the 'log' library on Android
207+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
208+
find_library(log-lib log)
209+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${log-lib})
210+
211+
target_link_libraries(ggml-base PUBLIC ${GGML_EXTRA_LIBS})
212+
endif()
213+
206214
target_include_directories(ggml-base PRIVATE .)
207215
if (GGML_BACKEND_DL)
208216
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)

ggml/src/ggml-backend-reg.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ struct ggml_backend_reg_entry {
159159
dl_handle_ptr handle;
160160
};
161161

162+
static bool laylaUseVulkan = false;
163+
static bool laylaUseOpenCL = false;
164+
static bool laylaUseHexagon = false;
165+
162166
struct ggml_backend_registry {
163167
std::vector<ggml_backend_reg_entry> backends;
164168
std::vector<ggml_backend_dev_t> devices;
@@ -174,10 +178,14 @@ struct ggml_backend_registry {
174178
register_backend(ggml_backend_sycl_reg());
175179
#endif
176180
#ifdef GGML_USE_VULKAN
177-
register_backend(ggml_backend_vk_reg());
181+
if(laylaUseVulkan) {
182+
register_backend(ggml_backend_vk_reg());
183+
}
178184
#endif
179185
#ifdef GGML_USE_OPENCL
180-
register_backend(ggml_backend_opencl_reg());
186+
if(laylaUseOpenCL) {
187+
register_backend(ggml_backend_opencl_reg());
188+
}
181189
#endif
182190
#ifdef GGML_USE_CANN
183191
register_backend(ggml_backend_cann_reg());
@@ -192,7 +200,9 @@ struct ggml_backend_registry {
192200
register_backend(ggml_backend_kompute_reg());
193201
#endif
194202
#ifdef GGML_USE_HEXAGON
195-
register_backend(ggml_backend_hexagon_reg());
203+
if(laylaUseHexagon) {
204+
register_backend(ggml_backend_hexagon_reg());
205+
}
196206
#endif
197207
#ifdef GGML_USE_CPU
198208
register_backend(ggml_backend_cpu_reg());
@@ -303,6 +313,12 @@ struct ggml_backend_registry {
303313
}
304314
};
305315

316+
void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon) {
317+
laylaUseVulkan = useVulkan;
318+
laylaUseOpenCL = useOpenCL;
319+
laylaUseHexagon = useHexagon;
320+
}
321+
306322
static ggml_backend_registry & get_reg() {
307323
static ggml_backend_registry reg;
308324
return reg;

0 commit comments

Comments
 (0)