Skip to content

Commit 6c87864

Browse files
authored
Merge branch 'layla-build' into merge
2 parents 4dd34ff + 4b5b503 commit 6c87864

30 files changed

+746514
-132
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ autogen-*.md
7575
!.github/workflows/*.yml
7676

7777
# Models
78-
7978
models/*
8079
models-mnt
8180
!models/.editorconfig

CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,21 @@ configure_file(cmake/llama.pc.in
243243

244244
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
245245
DESTINATION lib/pkgconfig)
246+
247+
#
248+
# utils, programs, examples and tests
249+
#
250+
251+
if (LLAMA_BUILD_COMMON)
252+
add_subdirectory(common)
253+
endif()
254+
255+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
256+
include(CTest)
257+
add_subdirectory(tests)
258+
endif()
259+
260+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
261+
add_subdirectory(examples)
262+
add_subdirectory(pocs)
263+
endif()

common/common.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
10381038
auto mparams = llama_model_default_params();
10391039

10401040
if (!params.devices.empty()) {
1041+
// add nullptr to the end just in case
1042+
params.devices.push_back(nullptr);
1043+
10411044
mparams.devices = params.devices.data();
10421045
}
10431046
if (params.n_gpu_layers != -1) {
@@ -2072,4 +2075,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
20722075

20732076
return result;
20742077
}
2075-

common/sampling.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ struct ring_buffer {
6060
return value;
6161
}
6262

63+
T pop_back() {
64+
if (sz == 0) {
65+
throw std::runtime_error("ring buffer is empty");
66+
}
67+
// Move pos backwards, wrapping around if necessary
68+
pos = (pos == 0) ? capacity - 1 : pos - 1;
69+
T value = data[pos];
70+
sz--;
71+
return value;
72+
}
73+
6374
const T & rat(size_t i) const {
6475
if (i >= sz) {
6576
throw std::runtime_error("ring buffer: index out of bounds");
@@ -248,6 +259,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
248259
llama_sampler_reset(gsmpl->chain);
249260
}
250261

262+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
263+
llama_sampler_reset(gsmpl->grmr);
264+
265+
gsmpl->grmr = llama_sampler_init_grammar(model, grammar, "root");
266+
}
267+
251268
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
252269
return new common_sampler {
253270
/* .params = */ gsmpl->params,
@@ -401,6 +418,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
401418
return result;
402419
}
403420

421+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
422+
return gsmpl->prev.to_vector();
423+
}
424+
425+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
426+
if(rollback_num > gsmpl->prev.size()) {
427+
rollback_num = gsmpl->prev.size();
428+
}
429+
430+
// continuously pop the last token
431+
for(int i = 0; i < rollback_num; i++) {
432+
gsmpl->prev.pop_back();
433+
}
434+
}
435+
404436
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
405437
switch (cnstr) {
406438
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
4647
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4748

4849
// arguments can be nullptr to skip printing
@@ -96,6 +97,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
9697

9798
// get a string representation of the last accepted tokens
9899
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
100+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
101+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
99102

100103
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
101104
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

examples/llava/clip.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
11111111

11121112
// read and create ggml_context containing the tensors and their data
11131113
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1114+
throw new std::runtime_error("Not implemented");
1115+
11141116
struct ggml_context * meta = NULL;
11151117

11161118
struct gguf_init_params params = {
@@ -2444,6 +2446,8 @@ bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f3
24442446
}
24452447

24462448
bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs, float * vec) {
2449+
throw new std::runtime_error("Not implemented");
2450+
24472451
if (!ctx->has_vision_encoder) {
24482452
LOG_ERR("This gguf file seems to have no vision encoder\n");
24492453
return false;

examples/llava/llava.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ static struct clip_image_grid_shape get_anyres_image_grid_shape(const std::pair<
101101

102102
// Take the image segments in a grid configuration and return the embeddings and the number of embeddings into preallocated memory (image_embd_out)
103103
static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *> & image_embd_v, struct clip_image_grid_shape grid_shape, float * image_embd_out, int * n_img_pos_out) {
104+
throw new std::runtime_error("Not implemented");
105+
104106
struct {
105107
struct ggml_context * ctx;
106108
} model;

ggml/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,13 @@ set(GGML_PUBLIC_HEADERS
250250
include/gguf.h)
251251

252252
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
253+
254+
# link android log library
255+
if(ANDROID)
256+
find_library(log-lib log)
257+
target_link_libraries(ggml PRIVATE ${log-lib})
258+
endif()
259+
253260
#if (GGML_METAL)
254261
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
255262
#endif()

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ extern "C" {
202202
//
203203
// Backend registry
204204
//
205+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL);
205206

206207
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
207208

ggml/src/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,14 @@ add_library(ggml-base
219219
ggml-quants.h
220220
gguf.cpp)
221221

222+
# Search for the 'log' library on Android
223+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
224+
find_library(log-lib log)
225+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${log-lib})
226+
227+
target_link_libraries(ggml-base PUBLIC ${GGML_EXTRA_LIBS})
228+
endif()
229+
222230
target_include_directories(ggml-base PRIVATE .)
223231

224232
add_library(ggml

0 commit comments

Comments
 (0)