l3utterfly
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 1 deletion b/‎.gitignore‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 18 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎common/common.cpp‎
Lines changed: 3 additions & 1 deletion b/‎common/common.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎common/sampling.cpp‎
Lines changed: 32 additions & 0 deletions b/‎common/sampling.cpp‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎common/sampling.h‎
Lines changed: 3 additions & 0 deletions b/‎common/sampling.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/llava/clip.cpp‎
Lines changed: 4 additions & 0 deletions b/‎examples/llava/clip.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/llava/llava.cpp‎
Lines changed: 2 additions & 0 deletions b/‎examples/llava/llava.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/CMakeLists.txt‎
Lines changed: 7 additions & 0 deletions b/‎ggml/CMakeLists.txt‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎ggml/include/ggml-backend.h‎
Lines changed: 1 addition & 0 deletions b/‎ggml/include/ggml-backend.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/src/CMakeLists.txt‎
Lines changed: 8 additions & 0 deletions b/‎ggml/src/CMakeLists.txt‎
Lines changed: 8 additions & 0 deletions
@@ -75,7 +75,6 @@ autogen-*.md
 !.github/workflows/*.yml
 
 # Models
-
 models/*
 models-mnt
 !models/.editorconfig
 
@@ -243,3 +243,21 @@ configure_file(cmake/llama.pc.in
 
 install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
         DESTINATION lib/pkgconfig)
+
+#
+# utils, programs, examples and tests
+#
+
+if (LLAMA_BUILD_COMMON)
+    add_subdirectory(common)
+endif()
+
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+    include(CTest)
+    add_subdirectory(tests)
+endif()
+
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+    add_subdirectory(pocs)
+endif()
@@ -1038,6 +1038,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
     auto mparams = llama_model_default_params();
 
     if (!params.devices.empty()) {
+        // add nullptr to the end just in case
+        params.devices.push_back(nullptr);
+
         mparams.devices = params.devices.data();
     }
     if (params.n_gpu_layers != -1) {
@@ -2072,4 +2075,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
 
     return result;
 }
-
 
@@ -60,6 +60,17 @@ struct ring_buffer {
         return value;
     }
 
+    T pop_back() {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        // Move pos backwards, wrapping around if necessary
+        pos = (pos == 0) ? capacity - 1 : pos - 1;
+        T value = data[pos];
+        sz--;
+        return value;
+    }
+
     const T & rat(size_t i) const {
         if (i >= sz) {
             throw std::runtime_error("ring buffer: index out of bounds");
@@ -248,6 +259,12 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
     llama_sampler_reset(gsmpl->chain);
 }
 
+void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
+    llama_sampler_reset(gsmpl->grmr);
+
+    gsmpl->grmr = llama_sampler_init_grammar(model, grammar, "root");
+}
+
 struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
     return new common_sampler {
         /* .params = */ gsmpl->params,
@@ -401,6 +418,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
     return result;
 }
 
+const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
+    return gsmpl->prev.to_vector();
+}
+
+void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
+    if(rollback_num > gsmpl->prev.size()) {
+        rollback_num = gsmpl->prev.size();
+    }
+
+    // continuously pop the last token
+    for(int i = 0; i < rollback_num; i++) {
+        gsmpl->prev.pop_back();
+    }
+}
+
 char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
     switch (cnstr) {
         case COMMON_SAMPLER_TYPE_DRY:         return 'd';
 
@@ -43,6 +43,7 @@ void common_sampler_free(struct common_sampler * gsmpl);
 // if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
 void                    common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
 void                    common_sampler_reset (struct common_sampler * gsmpl);
+void                    common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
 struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
 
 // arguments can be nullptr to skip printing
@@ -96,6 +97,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
 
 // get a string representation of the last accepted tokens
 std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
+const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
+void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
 
 char        common_sampler_type_to_chr(enum common_sampler_type cnstr);
 std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
 
@@ -1111,6 +1111,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
 
 // read and create ggml_context containing the tensors and their data
 struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
+    throw new std::runtime_error("Not implemented");
+
     struct ggml_context * meta = NULL;
 
     struct gguf_init_params params = {
@@ -2444,6 +2446,8 @@ bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f3
 }
 
 bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs, float * vec) {
+    throw new std::runtime_error("Not implemented");
+
     if (!ctx->has_vision_encoder) {
         LOG_ERR("This gguf file seems to have no vision encoder\n");
         return false;
 
@@ -101,6 +101,8 @@ static struct clip_image_grid_shape get_anyres_image_grid_shape(const std::pair<
 
 // Take the image segments in a grid configuration and return the embeddings and the number of embeddings into preallocated memory (image_embd_out)
 static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *> & image_embd_v, struct clip_image_grid_shape grid_shape, float * image_embd_out, int * n_img_pos_out) {
+    throw new std::runtime_error("Not implemented");
+
     struct {
         struct ggml_context * ctx;
     } model;
 
@@ -250,6 +250,13 @@ set(GGML_PUBLIC_HEADERS
     include/gguf.h)
 
 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
+
+# link android log library
+if(ANDROID)
+    find_library(log-lib log)
+    target_link_libraries(ggml PRIVATE ${log-lib})
+endif()
+
 #if (GGML_METAL)
 #    set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
 #endif()
 
@@ -202,6 +202,7 @@ extern "C" {
     //
     // Backend registry
     //
+    GGML_API void               ggml_backend_reg_layla(bool useVulkan, bool useOpenCL);
 
     GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
 
 
@@ -219,6 +219,14 @@ add_library(ggml-base
             ggml-quants.h
             gguf.cpp)
 
+# Search for the 'log' library on Android
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
+    find_library(log-lib log)
+    set(GGML_EXTRA_LIBS     ${GGML_EXTRA_LIBS}     ${log-lib})
+
+    target_link_libraries(ggml-base PUBLIC ${GGML_EXTRA_LIBS})
+endif()
+
 target_include_directories(ggml-base PRIVATE .)
 
 add_library(ggml
Original file line number	Diff line number	Diff line change
`@@ -1038,6 +1038,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {`
`1038`	`1038`	`auto mparams = llama_model_default_params();`
`1039`	`1039`
`1040`	`1040`	`if (!params.devices.empty()) {`
	`1041`	`+ // add nullptr to the end just in case`
	`1042`	`+ params.devices.push_back(nullptr);`
	`1043`	`+`
`1041`	`1044`	`mparams.devices = params.devices.data();`
`1042`	`1045`	`}`
`1043`	`1046`	`if (params.n_gpu_layers != -1) {`
`@@ -2072,4 +2075,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c`
`2072`	`2075`
`2073`	`2076`	`return result;`
`2074`	`2077`	`}`
`2075`		`-`