@@ -233,9 +233,6 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
233233# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
234234LLAMA_DEFAULT_SEED = 0xFFFFFFFF
235235
236- # define LLAMA_MAX_RNG_STATE (64*1024)
237- LLAMA_MAX_RNG_STATE = 64 * 1024
238-
239236# define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
240237LLAMA_FILE_MAGIC_GGLA = 0x67676C61
241238
@@ -247,13 +244,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
247244
248245# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
249246LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
250- # define LLAMA_SESSION_VERSION 7
251- LLAMA_SESSION_VERSION = 7
247+ # define LLAMA_SESSION_VERSION 8
248+ LLAMA_SESSION_VERSION = 8
252249
253250# define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
254251LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ
255- # define LLAMA_STATE_SEQ_VERSION 1
256- LLAMA_STATE_SEQ_VERSION = 1
252+ # define LLAMA_STATE_SEQ_VERSION 2
253+ LLAMA_STATE_SEQ_VERSION = 2
257254
258255# struct llama_model;
259256llama_model_p = NewType ("llama_model_p" , int )
@@ -1583,7 +1580,7 @@ def llama_lora_adapter_set(
15831580 ...
15841581
15851582
1586- # // Remove a LoRA adapter from given context
1583+ # // Remove a specific LoRA adapter from given context
15871584# // Return -1 if the adapter is not present in the context
15881585# LLAMA_API int32_t llama_lora_adapter_remove(
15891586# struct llama_context * ctx,
@@ -1601,6 +1598,19 @@ def llama_lora_adapter_remove(
16011598 ...
16021599
16031600
1601+ # // Remove all LoRA adapters from given context
1602+ # LLAMA_API void llama_lora_adapter_clear(
1603+ # struct llama_context * ctx);
1604+ @ctypes_function (
1605+ "llama_lora_adapter_clear" ,
1606+ [llama_context_p_ctypes ],
1607+ None ,
1608+ )
1609+ def llama_lora_adapter_clear (ctx : llama_context_p , / ):
1610+ """Remove all LoRA adapters from given context"""
1611+ ...
1612+
1613+
16041614# // Manually free a LoRA adapter
16051615# // Note: loaded adapters will be free when the associated model is deleted
16061616# LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
@@ -1992,17 +2002,17 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
19922002# //
19932003
19942004
1995- # Returns the maximum size in bytes of the state (rng, logits, embedding
1996- # and kv_cache) - will often be smaller after compacting tokens
1997- # LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
2005+ # // Returns the *actual* size in bytes of the state
2006+ # // (rng, logits, embedding and kv_cache)
2007+ # // Only use when saving the state, not when restoring it, otherwise the size may be too small.
2008+ # LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
19982009@ctypes_function ("llama_state_get_size" , [llama_context_p_ctypes ], ctypes .c_size_t )
19992010def llama_state_get_size (ctx : llama_context_p , / ) -> int :
2000- """Returns the maximum size in bytes of the state (rng, logits, embedding
2001- and kv_cache) - will often be smaller after compacting tokens"""
2011+ """Returns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokens"""
20022012 ...
20032013
20042014
2005- # LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx),
2015+ # LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
20062016# "use llama_state_get_size instead");
20072017@ctypes_function ("llama_get_state_size" , [llama_context_p_ctypes ], ctypes .c_size_t )
20082018def llama_get_state_size (ctx : llama_context_p , / ) -> int :
@@ -2011,22 +2021,27 @@ def llama_get_state_size(ctx: llama_context_p, /) -> int:
20112021 ...
20122022
20132023
2014- # Copies the state to the specified destination address.
2015- # Destination needs to have allocated enough memory.
2016- # Returns the number of bytes copied
2024+ # // Copies the state to the specified destination address.
2025+ # // Destination needs to have allocated enough memory.
2026+ # // Returns the number of bytes copied
20172027# LLAMA_API size_t llama_state_get_data(
20182028# struct llama_context * ctx,
2019- # uint8_t * dst);
2029+ # uint8_t * dst,
2030+ # size_t size);
20202031@ctypes_function (
20212032 "llama_state_get_data" ,
20222033 [
20232034 llama_context_p_ctypes ,
20242035 ctypes .POINTER (ctypes .c_uint8 ),
2036+ ctypes .c_size_t ,
20252037 ],
20262038 ctypes .c_size_t ,
20272039)
20282040def llama_state_get_data (
2029- ctx : llama_context_p , dst : CtypesArray [ctypes .c_uint8 ], /
2041+ ctx : llama_context_p ,
2042+ dst : CtypesArray [ctypes .c_uint8 ],
2043+ size : Union [ctypes .c_size_t , int ],
2044+ / ,
20302045) -> int :
20312046 """Copies the state to the specified destination address.
20322047 Destination needs to have allocated enough memory.
@@ -2059,14 +2074,18 @@ def llama_copy_state_data(
20592074# // Returns the number of bytes read
20602075# LLAMA_API size_t llama_state_set_data(
20612076# struct llama_context * ctx,
2062- # const uint8_t * src);
2077+ # const uint8_t * src,
2078+ # size_t size);
20632079@ctypes_function (
20642080 "llama_state_set_data" ,
2065- [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 )],
2081+ [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 ), ctypes . c_size_t ],
20662082 ctypes .c_size_t ,
20672083)
20682084def llama_state_set_data (
2069- ctx : llama_context_p , src : CtypesArray [ctypes .c_uint8 ], /
2085+ ctx : llama_context_p ,
2086+ src : CtypesArray [ctypes .c_uint8 ],
2087+ size : Union [ctypes .c_size_t , int ],
2088+ / ,
20702089) -> int :
20712090 """Set the state reading from the specified address
20722091 Returns the number of bytes read"""
@@ -2216,14 +2235,24 @@ def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> i
22162235# LLAMA_API size_t llama_state_seq_get_data(
22172236# struct llama_context * ctx,
22182237# uint8_t * dst,
2238+ # size_t size,
22192239# llama_seq_id seq_id);
22202240@ctypes_function (
22212241 "llama_state_seq_get_data" ,
2222- [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 ), llama_seq_id ],
2242+ [
2243+ llama_context_p_ctypes ,
2244+ ctypes .POINTER (ctypes .c_uint8 ),
2245+ ctypes .c_size_t ,
2246+ llama_seq_id ,
2247+ ],
22232248 ctypes .c_size_t ,
22242249)
22252250def llama_state_seq_get_data (
2226- ctx : llama_context_p , dst : CtypesArray [ctypes .c_uint8 ], seq_id : llama_seq_id , /
2251+ ctx : llama_context_p ,
2252+ dst : CtypesArray [ctypes .c_uint8 ],
2253+ size : Union [ctypes .c_size_t , int ],
2254+ seq_id : llama_seq_id ,
2255+ / ,
22272256) -> int :
22282257 """Copy the KV cache of a single sequence into the specified buffer"""
22292258 ...
@@ -2236,14 +2265,24 @@ def llama_state_seq_get_data(
22362265# LLAMA_API size_t llama_state_seq_set_data(
22372266# struct llama_context * ctx,
22382267# const uint8_t * src,
2268+ # size_t size,
22392269# llama_seq_id dest_seq_id);
22402270@ctypes_function (
22412271 "llama_state_seq_set_data" ,
2242- [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 ), llama_seq_id ],
2272+ [
2273+ llama_context_p_ctypes ,
2274+ ctypes .POINTER (ctypes .c_uint8 ),
2275+ ctypes .c_size_t ,
2276+ llama_seq_id ,
2277+ ],
22432278 ctypes .c_size_t ,
22442279)
22452280def llama_state_seq_set_data (
2246- ctx : llama_context_p , src : CtypesArray [ctypes .c_uint8 ], dest_seq_id : llama_seq_id , /
2281+ ctx : llama_context_p ,
2282+ src : CtypesArray [ctypes .c_uint8 ],
2283+ size : Union [ctypes .c_size_t , int ],
2284+ dest_seq_id : llama_seq_id ,
2285+ / ,
22472286) -> int :
22482287 """Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence"""
22492288 ...
0 commit comments