Skip to content

Commit bce287c

Browse files
authored
Merge branch 'layla-build' into merge
2 parents 772703c + 5d9a182 commit bce287c

21 files changed

+138
-19
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ autogen-*.md
7373
!.github/workflows/*.yml
7474

7575
# Models
76-
7776
models/*
7877
models-mnt
7978
!models/.editorconfig

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,4 @@ endif()
212212
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
213213
add_subdirectory(examples)
214214
add_subdirectory(pocs)
215-
endif()
215+
endif()

common/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2099,4 +2099,4 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20992099
fprintf(stream, "typ_p: %f # default: 1.0\n", sparams.typ_p);
21002100
fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
21012101
fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false");
2102-
}
2102+
}

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,4 +595,4 @@ void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const cha
595595

596596
void yaml_dump_non_result_info(
597597
FILE * stream, const common_params & params, const llama_context * lctx,
598-
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
598+
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);

common/sampling.cpp

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ struct ring_buffer {
6060
return value;
6161
}
6262

63+
T pop_back() {
64+
if (sz == 0) {
65+
throw std::runtime_error("ring buffer is empty");
66+
}
67+
// Move pos backwards, wrapping around if necessary
68+
pos = (pos == 0) ? capacity - 1 : pos - 1;
69+
T value = data[pos];
70+
sz--;
71+
return value;
72+
}
73+
6374
const T & rat(size_t i) const {
6475
if (i >= sz) {
6576
throw std::runtime_error("ring buffer: index out of bounds");
@@ -163,15 +174,15 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
163174

164175
llama_sampler_chain_add(result->chain,
165176
llama_sampler_init_penalties(
166-
llama_n_vocab (model),
167-
llama_token_eos(model),
168-
llama_token_nl (model),
169-
params.penalty_last_n,
170-
params.penalty_repeat,
171-
params.penalty_freq,
172-
params.penalty_present,
173-
params.penalize_nl,
174-
params.ignore_eos));
177+
llama_n_vocab (model),
178+
llama_token_eos(model),
179+
llama_token_nl (model),
180+
params.penalty_last_n,
181+
params.penalty_repeat,
182+
params.penalty_freq,
183+
params.penalty_present,
184+
params.penalize_nl,
185+
params.ignore_eos));
175186

176187
if (params.mirostat == 0) {
177188
for (const auto & cnstr : params.samplers) {
@@ -252,6 +263,16 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
252263
llama_sampler_reset(gsmpl->chain);
253264
}
254265

266+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
267+
llama_sampler_reset(gsmpl->grmr);
268+
269+
gsmpl->grmr = llama_sampler_init_grammar(model, grammar, "root");
270+
}
271+
272+
void common_sampler_reset_grammar(struct common_sampler * gsmpl) {
273+
llama_sampler_reset(gsmpl->grmr);
274+
}
275+
255276
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
256277
return new common_sampler {
257278
/* .params = */ gsmpl->params,
@@ -366,6 +387,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
366387
return result;
367388
}
368389

390+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
391+
return gsmpl->prev.to_vector();
392+
}
393+
394+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
395+
if(rollback_num > gsmpl->prev.size()) {
396+
rollback_num = gsmpl->prev.size();
397+
}
398+
399+
// continuously pop the last token
400+
for(int i = 0; i < rollback_num; i++) {
401+
gsmpl->prev.pop_back();
402+
}
403+
}
404+
369405
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
370406
switch (cnstr) {
371407
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
47+
void common_sampler_reset_grammar(struct common_sampler * gsmpl);
4648
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4749

4850
// arguments can be nullptr to skip printing
@@ -75,6 +77,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
7577

7678
// get a string representation of the last accepted tokens
7779
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
80+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
81+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
7882

7983
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
8084
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

ggml/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,13 @@ set(GGML_PUBLIC_HEADERS
234234
include/ggml-vulkan.h)
235235

236236
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
237+
238+
# link android log library
239+
if(ANDROID)
240+
find_library(log-lib log)
241+
target_link_libraries(ggml PRIVATE ${log-lib})
242+
endif()
243+
237244
#if (GGML_METAL)
238245
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
239246
#endif()

ggml/src/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,12 @@ endif()
202202

203203
# ggml
204204

205+
# Search for the 'log' library on Android
206+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
207+
find_library(log-lib log)
208+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${log-lib})
209+
endif()
210+
205211
add_library(ggml-base
206212
../include/ggml.h
207213
../include/ggml-alloc.h

ggml/src/ggml-aarch64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,4 @@ size_t quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_
126126
size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
127127
UNUSED(quant_weights);
128128
return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
129-
}
129+
}

ggml/src/ggml-quants.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5244,4 +5244,4 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
52445244
}
52455245

52465246
return true;
5247-
}
5247+
}

0 commit comments

Comments
 (0)