Merge branch 'layla-build' into merge

l3utterfly · web-flow · commit bce287c92466 · 2024-11-16T14:50:39.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -73,7 +73,6 @@ autogen-*.md
 !.github/workflows/*.yml
 
 # Models
-
 models/*
 models-mnt
 !models/.editorconfig
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -212,4 +212,4 @@ endif()
 if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
     add_subdirectory(examples)
     add_subdirectory(pocs)
-endif()
+endif()
diff --git a/common/common.cpp b/common/common.cpp
@@ -2099,4 +2099,4 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
     fprintf(stream, "typ_p: %f # default: 1.0\n", sparams.typ_p);
     fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
     fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false");
-}
+}
diff --git a/common/common.h b/common/common.h
@@ -595,4 +595,4 @@ void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const cha
 
 void yaml_dump_non_result_info(
     FILE * stream, const common_params & params, const llama_context * lctx,
-    const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
+    const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
diff --git a/common/sampling.cpp b/common/sampling.cpp
@@ -60,6 +60,17 @@ struct ring_buffer {
         return value;
     }
 
+    T pop_back() {
+        if (sz == 0) {
+            throw std::runtime_error("ring buffer is empty");
+        }
+        // Move pos backwards, wrapping around if necessary
+        pos = (pos == 0) ? capacity - 1 : pos - 1;
+        T value = data[pos];
+        sz--;
+        return value;
+    }
+
     const T & rat(size_t i) const {
         if (i >= sz) {
             throw std::runtime_error("ring buffer: index out of bounds");
@@ -163,15 +174,15 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
 
     llama_sampler_chain_add(result->chain,
             llama_sampler_init_penalties(
-                llama_n_vocab  (model),
-                llama_token_eos(model),
-                llama_token_nl (model),
-                params.penalty_last_n,
-                params.penalty_repeat,
-                params.penalty_freq,
-                params.penalty_present,
-                params.penalize_nl,
-                params.ignore_eos));
+                    llama_n_vocab  (model),
+                    llama_token_eos(model),
+                    llama_token_nl (model),
+                    params.penalty_last_n,
+                    params.penalty_repeat,
+                    params.penalty_freq,
+                    params.penalty_present,
+                    params.penalize_nl,
+                    params.ignore_eos));
 
     if (params.mirostat == 0) {
         for (const auto & cnstr : params.samplers) {
@@ -252,6 +263,16 @@ void common_sampler_reset(struct common_sampler * gsmpl) {
     llama_sampler_reset(gsmpl->chain);
 }
 
+void common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar) {
+    llama_sampler_reset(gsmpl->grmr);
+
+    gsmpl->grmr = llama_sampler_init_grammar(model, grammar, "root");
+}
+
+void common_sampler_reset_grammar(struct common_sampler * gsmpl) {
+    llama_sampler_reset(gsmpl->grmr);
+}
+
 struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
     return new common_sampler {
         /* .params = */ gsmpl->params,
@@ -366,6 +387,21 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
     return result;
 }
 
+const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl) {
+    return gsmpl->prev.to_vector();
+}
+
+void common_sampler_rollback(common_sampler * gsmpl, int rollback_num) {
+    if(rollback_num > gsmpl->prev.size()) {
+        rollback_num = gsmpl->prev.size();
+    }
+
+    // continuously pop the last token
+    for(int i = 0; i < rollback_num; i++) {
+        gsmpl->prev.pop_back();
+    }
+}
+
 char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
     switch (cnstr) {
         case COMMON_SAMPLER_TYPE_DRY:         return 'd';
diff --git a/common/sampling.h b/common/sampling.h
@@ -43,6 +43,8 @@ void common_sampler_free(struct common_sampler * gsmpl);
 // if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
 void                    common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
 void                    common_sampler_reset (struct common_sampler * gsmpl);
+void                    common_sampler_reinit_grammar(struct common_sampler * gsmpl, const struct llama_model * model, const char * grammar);
+void                    common_sampler_reset_grammar(struct common_sampler * gsmpl);
 struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
 
 // arguments can be nullptr to skip printing
@@ -75,6 +77,8 @@ std::string common_sampler_print(const struct common_sampler * gsmpl);
 
 // get a string representation of the last accepted tokens
 std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
+const std::vector<llama_token> common_sampler_prev(common_sampler * gsmpl);
+void common_sampler_rollback(common_sampler * gsmpl, int rollback_num);
 
 char        common_sampler_type_to_chr(enum common_sampler_type cnstr);
 std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
@@ -234,6 +234,13 @@ set(GGML_PUBLIC_HEADERS
     include/ggml-vulkan.h)
 
 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
+
+# link android log library
+if(ANDROID)
+    find_library(log-lib log)
+    target_link_libraries(ggml PRIVATE ${log-lib})
+endif()
+
 #if (GGML_METAL)
 #    set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
 #endif()
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
@@ -202,6 +202,12 @@ endif()
 
 # ggml
 
+# Search for the 'log' library on Android
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
+    find_library(log-lib log)
+    set(GGML_EXTRA_LIBS     ${GGML_EXTRA_LIBS}     ${log-lib})
+endif()
+
 add_library(ggml-base
             ../include/ggml.h
             ../include/ggml-alloc.h
diff --git a/ggml/src/ggml-aarch64.c b/ggml/src/ggml-aarch64.c
@@ -126,4 +126,4 @@ size_t quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_
 size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
     UNUSED(quant_weights);
     return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
-}
+}
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
@@ -5244,4 +5244,4 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
     }
 
     return true;
-}
+}
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -161,6 +161,24 @@ static void ggml_print_backtrace(void) {
 }
 #endif
 
+#if defined(__ANDROID__)
+#include <android/log.h>
+
+void ggml_abort(const char * file, int line, const char * fmt, ...) {
+    va_list args;
+    va_start(args, fmt);
+    
+    char message[1024];  // Adjust size as needed
+    vsnprintf(message, sizeof(message), fmt, args);
+    
+    __android_log_print(ANDROID_LOG_ERROR, "GGML", "%s:%d: %s", file, line, message);
+    
+    va_end(args);
+
+    ggml_print_backtrace();  // You may need to modify this function as well
+    abort();
+}
+#else
 void ggml_abort(const char * file, int line, const char * fmt, ...) {
     fflush(stdout);
 
@@ -176,6 +194,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
     ggml_print_backtrace();
     abort();
 }
+#endif
 
 //
 // logging
@@ -550,6 +569,16 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
 
     return file;
 #else
+    // if file does not have a path, we assume it's a file descriptor
+    if (strchr(fname, '/') == NULL) {
+        char *endptr;
+        long num = strtol(fname, &endptr, 10);
+        FILE *file = fdopen(dup(num), mode);
+
+        if (file != NULL) {
+            return file;
+        } 
+    }
     return fopen(fname, mode);
 #endif
 
diff --git a/grammars/chinese.gbnf b/grammars/chinese.gbnf
@@ -0,0 +1,4 @@
+root          ::= cn-char+ ([ \t\n] cn-char+)*
+cn-char       ::= cjk | punctuation
+cjk           ::= [一-鿿] | [𠀀-𯿽]
+punctuation   ::= [、-〾]
diff --git a/grammars/korean.gbnf b/grammars/korean.gbnf
@@ -0,0 +1,8 @@
+root            ::= conversation+ 
+conversation    ::= assistant-line "\nUSER: "
+assistant-line  ::= kr-string "\n"
+kr-string       ::= kr-char* 
+kr-char         ::= hangul | punctuation | whitespace
+hangul          ::= [가-힣]
+punctuation     ::= [、-〾]
+whitespace      ::= [ \t]
diff --git a/grammars/schedule.gbnf b/grammars/schedule.gbnf
@@ -0,0 +1,3 @@
+root       ::= record
+record     ::= "Event: " string "\n" "Date: " string "\n" "Time: " string "\n"
+string     ::= "" [ -~]* ""
diff --git a/include/llama.h b/include/llama.h
@@ -1250,4 +1250,4 @@ extern "C" {
 }
 #endif
 
-#endif // LLAMA_H
+#endif // LLAMA_H
diff --git a/ios.toolchain.cmake b/ios.toolchain.cmake
@@ -0,0 +1,17 @@
+set(CMAKE_SYSTEM_NAME iOS)
+
+# specify the cross compiler
+set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH NO)
+
+# specify which architectures to build for
+set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)")
+
+# you can also choose to build for a specific device
+# set(CMAKE_OSX_ARCHITECTURES "arm64")
+# or for the simulator
+# set(CMAKE_OSX_ARCHITECTURES "x86_64")
+
+set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos;-iphonesimulator")
+
+# you might also want to set the deployment target
+# set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "10.0")
diff --git a/kompute b/kompute
@@ -0,0 +1 @@
+Subproject commit 4565194ed7c32d1d2efa32ceab4d3c6cae006306
diff --git a/prompts/chat-with-layla.txt b/prompts/chat-with-layla.txt
@@ -0,0 +1,5 @@
+Layla is an AI created by Layla Network that is helpful, polite, and to the point. She is here to help User with everyday tasks. Layla's favourite animal is the butterfly because it represents transformation, growth, and beauty.
+
+Conversation between User and an Assistant named "Layla":
+
+User:
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
@@ -2344,4 +2344,4 @@ void llama_perf_sampler_reset(struct llama_sampler * chain) {
     auto * ctx = (struct llama_sampler_chain *) chain->ctx;
 
     ctx->t_sample_us = ctx->n_sample = 0;
-}
+}
diff --git a/src/llama-sampling.h b/src/llama-sampling.h
@@ -45,4 +45,4 @@ struct llama_sampler * llama_sampler_init_dry_testing(
                            float   dry_base,
                          int32_t   dry_allowed_length,
                          int32_t   dry_penalty_last_n,
-  const std::vector<std::vector<llama_token>>& seq_breakers);
+  const std::vector<std::vector<llama_token>>& seq_breakers);
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -22139,4 +22139,4 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
     (void) user_data;
     fputs(text, stderr);
     fflush(stderr);
-}
+}

Original file line number	Diff line number	Diff line change
`@@ -5244,4 +5244,4 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte`
`5244`	`5244`	`}`
`5245`	`5245`
`5246`	`5246`	`return true;`
`5247`		`-}`
	`5247`	`+}`