@@ -1444,6 +1444,24 @@ def llama_get_model_tensor(
14441444 ...
14451445
14461446
1447+ # // Returns true if the model contains an encoder that requires llama_encode() call
1448+ # LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
1449+ @ctypes_function ("llama_model_has_encoder" , [llama_model_p_ctypes ], ctypes .c_bool )
1450+ def llama_model_has_encoder (model : llama_model_p , / ) -> bool :
1451+ """Returns true if the model contains an encoder that requires llama_encode() call"""
1452+ ...
1453+
1454+
1455+ # // For encoder-decoder models, this function returns id of the token that must be provided
1456+ # // to the decoder to start generating output sequence. For other models, it returns -1.
1457+ # LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
1458+ @ctypes_function ("llama_model_decoder_start_token" , [llama_model_p_ctypes ], ctypes .c_int32 )
1459+ def llama_model_decoder_start_token (model : llama_model_p , / ) -> int :
1460+ """For encoder-decoder models, this function returns id of the token that must be provided
1461+ to the decoder to start generating output sequence. For other models, it returns -1."""
1462+ ...
1463+
1464+
14471465# // Returns 0 on success
14481466# LLAMA_API uint32_t llama_model_quantize(
14491467# const char * fname_inp,
@@ -2271,6 +2289,22 @@ def llama_batch_free(batch: llama_batch, /):
22712289 ...
22722290
22732291
2292+ # // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2293+ # // Stores the encoder output internally for later use by the decoder cross-attention layers.
2294+ # // 0 - success
2295+ # // < 0 - error
2296+ # LLAMA_API int32_t llama_encode(
2297+ # struct llama_context * ctx,
2298+ # struct llama_batch batch);
2299+ @ctypes_function ("llama_encode" , [llama_context_p_ctypes , llama_batch ], ctypes .c_int32 )
2300+ def llama_encode (ctx : llama_context_p , batch : llama_batch , / ) -> int :
2301+ """Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2302+ Stores the encoder output internally for later use by the decoder cross-attention layers.
2303+ 0 - success
2304+ < 0 - error"""
2305+ ...
2306+
2307+
22742308# // Positive return values does not mean a fatal error, but rather a warning.
22752309# // 0 - success
22762310# // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
0 commit comments