feat: Update llama.cpp

abetlen · abetlen · commit 7e20e346bd49 · 2024-07-04T12:23:47.000-04:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -1444,6 +1444,24 @@ def llama_get_model_tensor(
     ...
 
 
+# // Returns true if the model contains an encoder that requires llama_encode() call
+# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
+@ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
+def llama_model_has_encoder(model: llama_model_p, /) -> bool:
+    """Returns true if the model contains an encoder that requires llama_encode() call"""
+    ...
+
+
+# // For encoder-decoder models, this function returns id of the token that must be provided
+# // to the decoder to start generating output sequence. For other models, it returns -1.
+# LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
+@ctypes_function("llama_model_decoder_start_token", [llama_model_p_ctypes], ctypes.c_int32)
+def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
+    """For encoder-decoder models, this function returns id of the token that must be provided
+    to the decoder to start generating output sequence. For other models, it returns -1."""
+    ...
+
+
 # // Returns 0 on success
 # LLAMA_API uint32_t llama_model_quantize(
 #         const char * fname_inp,
@@ -2271,6 +2289,22 @@ def llama_batch_free(batch: llama_batch, /):
     ...
 
 
+# // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
+# // Stores the encoder output internally for later use by the decoder cross-attention layers.
+# //   0 - success
+# // < 0 - error
+# LLAMA_API int32_t llama_encode(
+#         struct llama_context * ctx,
+#           struct llama_batch   batch);
+@ctypes_function("llama_encode", [llama_context_p_ctypes, llama_batch], ctypes.c_int32)
+def llama_encode(ctx: llama_context_p, batch: llama_batch, /) -> int:
+    """Processes a batch of tokens with the ecoder part of the encoder-decoder model.
+    Stores the encoder output internally for later use by the decoder cross-attention layers.
+    0 - success
+    < 0 - error"""
+    ...
+
+
 # // Positive return values does not mean a fatal error, but rather a warning.
 # //   0 - success
 # //   1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 968967376dc2c018d29f897c4883d335bbf384fb
+Subproject commit 51d2ebadbbf365b894f3888361df42dbacb12b7a