Skip to content

Commit 648f244

Browse files
authored
Merge branch 'layla-build' into merge
2 parents 06c2b15 + be6c8d6 commit 648f244

34 files changed

+1668135
-163
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ autogen-*.md
7777
!.github/workflows/*.yml
7878

7979
# Models
80-
8180
models/*
8281
models-mnt
8382
!models/.editorconfig

common/common.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
10801080
auto mparams = llama_model_default_params();
10811081

10821082
if (!params.devices.empty()) {
1083+
// add nullptr to the end just in case
1084+
params.devices.push_back(nullptr);
1085+
10831086
mparams.devices = params.devices.data();
10841087
}
10851088
if (params.n_gpu_layers != -1) {
@@ -2025,4 +2028,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
20252028

20262029
return result;
20272030
}
2028-

common/minja/chat-template.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "minja.hpp"
12-
#include <json.hpp>
12+
#include "json.hpp"
1313
#include <string>
1414
#include <vector>
1515

common/minja/minja.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include <stdexcept>
1717
#include <sstream>
1818
#include <unordered_set>
19-
#include <json.hpp>
19+
#include "json.hpp"
2020

2121
using json = nlohmann::ordered_json;
2222

common/sampling.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ struct ring_buffer {
6060
return value;
6161
}
6262

63+
T pop_back() {
64+
if (sz == 0) {
65+
throw std::runtime_error("ring buffer is empty");
66+
}
67+
// Move pos backwards, wrapping around if necessary
68+
pos = (pos == 0) ? capacity - 1 : pos - 1;
69+
T value = data[pos];
70+
sz--;
71+
return value;
72+
}
73+
6374
const T & rat(size_t i) const {
6475
if (i >= sz) {
6576
throw std::runtime_error("ring buffer: index out of bounds");
@@ -275,6 +286,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
275286
llama_sampler_reset(gsmpl->chain);
276287
}
277288

289+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
290+
llama_sampler_reset(gsmpl->grmr);
291+
292+
gsmpl->grmr = llama_sampler_init_grammar(llama_model_get_vocab(model), grammar, "root");
293+
}
294+
278295
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
279296
return new common_sampler {
280297
/* .params = */ gsmpl->params,
@@ -428,6 +445,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
428445
return result;
429446
}
430447

448+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
449+
return gsmpl->prev.to_vector();
450+
}
451+
452+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
453+
if(rollback_num > gsmpl->prev.size()) {
454+
rollback_num = gsmpl->prev.size();
455+
}
456+
457+
// continuously pop the last token
458+
for(int i = 0; i < rollback_num; i++) {
459+
gsmpl->prev.pop_back();
460+
}
461+
}
462+
431463
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
432464
switch (cnstr) {
433465
case COMMON_SAMPLER_TYPE_DRY: return 'd';

common/sampling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
4343
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
4444
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
4545
void common_sampler_reset (struct common_sampler * gsmpl);
46+
void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
4647
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
4748

4849
// arguments can be nullptr to skip printing
@@ -96,6 +97,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
9697

9798
// get a string representation of the last accepted tokens
9899
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
100+
const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
101+
void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
99102

100103
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
101104
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);

examples/llava/llava.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "llava.h"
33

44
#include "llama.h"
5+
#include "llama-impl.h"
56

67
#include <algorithm>
78
#include <cerrno>
@@ -17,10 +18,10 @@
1718
# define LOG_ERR(...)
1819
# define LOG_DBG(...)
1920
#else // defined(LLAVA_LOG_OFF)
20-
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
21-
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
22-
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
23-
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
21+
# define LOG_INF(...) LLAMA_LOG_INFO(__VA_ARGS__)
22+
# define LOG_WRN(...) LLAMA_LOG_WARN(__VA_ARGS__)
23+
# define LOG_ERR(...) LLAMA_LOG_ERROR(__VA_ARGS__)
24+
# define LOG_DBG(...) LLAMA_LOG_DEBUG(__VA_ARGS__)
2425
#endif // defined(LLAVA_LOG_OFF)
2526

2627
// RGB uint8 image

ggml/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,13 @@ set(GGML_PUBLIC_HEADERS
257257
include/gguf.h)
258258

259259
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
260+
261+
# link android log library
262+
if(ANDROID)
263+
find_library(log-lib log)
264+
target_link_libraries(ggml PRIVATE ${log-lib})
265+
endif()
266+
260267
#if (GGML_METAL)
261268
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
262269
#endif()

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ extern "C" {
202202
//
203203
// Backend registry
204204
//
205+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL);
205206

206207
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
207208

ggml/src/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ add_library(ggml-base
225225
ggml-quants.h
226226
gguf.cpp)
227227

228+
# Search for the 'log' library on Android
229+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
230+
find_library(log-lib log)
231+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${log-lib})
232+
233+
target_link_libraries(ggml-base PUBLIC ${GGML_EXTRA_LIBS})
234+
endif()
235+
228236
target_include_directories(ggml-base PRIVATE .)
229237

230238
add_library(ggml

0 commit comments

Comments
 (0)