@@ -28,7 +28,7 @@ impl LlamaContext<'_> {
2828 let p0 = p0. map_or ( -1 , i32:: from) ;
2929 let p1 = p1. map_or ( -1 , i32:: from) ;
3030 unsafe {
31- llama_cpp_sys_2:: llama_kv_cache_seq_cp ( self . context . as_ptr ( ) , src, dest, p0, p1)
31+ llama_cpp_sys_2:: llama_kv_cache_seq_cp ( self . context . as_ptr ( ) , src, dest, p0, p1) ;
3232 }
3333 }
3434
@@ -48,6 +48,7 @@ impl LlamaContext<'_> {
4848 }
4949
5050 /// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
51+ #[ must_use]
5152 pub fn get_kv_cache_used_cells ( & self ) -> i32 {
5253 unsafe { llama_cpp_sys_2:: llama_get_kv_cache_used_cells ( self . context . as_ptr ( ) ) }
5354 }
@@ -68,8 +69,8 @@ impl LlamaContext<'_> {
6869
6970 /// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
7071 /// If the KV cache is RoPEd, the KV data is updated accordingly:
71- /// - lazily on next llama_decode()
72- /// - explicitly with llama_kv_cache_update()
72+ /// - lazily on next [`LlamaContext::decode`]
73+ /// - explicitly with [`Self::kv_cache_update`]
7374 ///
7475 /// # Parameters
7576 ///
@@ -81,14 +82,14 @@ impl LlamaContext<'_> {
8182 let p0 = p0. map_or ( -1 , i32:: from) ;
8283 let p1 = p1. map_or ( -1 , i32:: from) ;
8384 unsafe {
84- llama_cpp_sys_2:: llama_kv_cache_seq_add ( self . context . as_ptr ( ) , seq_id, p0, p1, delta)
85+ llama_cpp_sys_2:: llama_kv_cache_seq_add ( self . context . as_ptr ( ) , seq_id, p0, p1, delta) ;
8586 }
8687 }
8788
8889 /// Integer division of the positions by factor of `d > 1`
89- /// If the KV cache is RoPEd, the KV data is updated accordingly:
90- /// - lazily on next llama_decode()
91- /// - explicitly with llama_kv_cache_update()
90+ /// If the KV cache is ` RoPEd` , the KV data is updated accordingly:
91+ /// - lazily on next [`LlamaContext::decode`]
92+ /// - explicitly with [`Self::kv_cache_update`]
9293 ///
9394 /// # Parameters
9495 ///
@@ -114,14 +115,15 @@ impl LlamaContext<'_> {
114115 /// # Parameters
115116 ///
116117 /// * `seq_id` - The sequence id to get the max position for
118+ #[ must_use]
117119 pub fn kv_cache_seq_pos_max ( & self , seq_id : i32 ) -> i32 {
118120 unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_pos_max ( self . context . as_ptr ( ) , seq_id) }
119121 }
120122
121123 /// Defragment the KV cache
122124 /// This will be applied:
123- /// - lazily on next llama_decode()
124- /// - explicitly with llama_kv_cache_update()
125+ /// - lazily on next [`LlamaContext::decode`]
126+ /// - explicitly with [`Self::kv_cache_update`]
125127 pub fn kv_cache_defrag ( & mut self ) {
126128 unsafe { llama_cpp_sys_2:: llama_kv_cache_defrag ( self . context . as_ptr ( ) ) }
127129 }
@@ -133,6 +135,7 @@ impl LlamaContext<'_> {
133135
134136 /// Returns the number of tokens in the KV cache (slow, use only for debug)
135137 /// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
138+ #[ must_use]
136139 pub fn get_kv_cache_token_count ( & self ) -> i32 {
137140 unsafe { llama_cpp_sys_2:: llama_get_kv_cache_token_count ( self . context . as_ptr ( ) ) }
138141 }
@@ -143,7 +146,8 @@ impl LlamaContext<'_> {
143146 ///
144147 /// * `n_max_seq` - Maximum number of sequences that can exist in a cell. It's not an error
145148 /// if there are more sequences in a cell than this value, however they will
146- /// not be visible in the view cells_sequences.
149+ /// not be visible in the view `cells_sequences`.
150+ #[ must_use]
147151 pub fn new_kv_cache_view ( & self , n_max_seq : i32 ) -> KVCacheView {
148152 let view =
149153 unsafe { llama_cpp_sys_2:: llama_kv_cache_view_init ( self . context . as_ptr ( ) , n_max_seq) } ;
@@ -170,39 +174,48 @@ impl<'a> KVCacheView<'a> {
170174 /// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
171175 pub fn update ( & mut self ) {
172176 unsafe {
173- llama_cpp_sys_2:: llama_kv_cache_view_update ( self . ctx . context . as_ptr ( ) , & mut self . view )
177+ llama_cpp_sys_2:: llama_kv_cache_view_update ( self . ctx . context . as_ptr ( ) , & mut self . view ) ;
174178 }
175179 }
176180
177181 /// Number of KV cache cells. This will be the same as the context size.
182+ #[ must_use]
178183 pub fn n_cells ( & self ) -> i32 {
179184 self . view . n_cells
180185 }
181186
182187 /// Number of tokens in the cache. For example, if there are two populated
183188 /// cells, the first with 1 sequence id in it and the second with 2 sequence
184189 /// ids then you'll have 3 tokens.
190+ #[ must_use]
185191 pub fn token_count ( & self ) -> i32 {
186192 self . view . token_count
187193 }
188194
189195 /// Number of populated cache cells.
196+ #[ must_use]
190197 pub fn used_cells ( & self ) -> i32 {
191198 self . view . used_cells
192199 }
193200
194201 /// Maximum contiguous empty slots in the cache.
202+ #[ must_use]
195203 pub fn max_contiguous ( & self ) -> i32 {
196204 self . view . max_contiguous
197205 }
198206
199- /// Index to the start of the max_contiguous slot range. Can be negative
207+ /// Index to the start of the ` max_contiguous` slot range. Can be negative
200208 /// when cache is full.
209+ #[ must_use]
201210 pub fn max_contiguous_idx ( & self ) -> i32 {
202211 self . view . max_contiguous_idx
203212 }
204213
205214 /// Information for individual cells.
215+ ///
216+ /// # Panics
217+ ///
218+ /// - if `n_cells` does not fit into usize.
206219 pub fn cells ( & self ) -> impl Iterator < Item = KVCacheViewCell > {
207220 unsafe {
208221 std:: slice:: from_raw_parts (
@@ -214,7 +227,12 @@ impl<'a> KVCacheView<'a> {
214227 . map ( |& cell| KVCacheViewCell { pos : cell. pos } )
215228 }
216229
217- /// The sequences for each cell. There will be n_max_seq items per cell.
230+ /// The sequences for each cell. There will be `n_max_seq` items per cell.
231+ ///
232+ /// # Panics
233+ ///
234+ /// - if `n_cells * n_max_seq` does not fit into usize.
235+ /// - if `n_max_seq` does not fit into usize.
218236 pub fn cells_sequences ( & self ) -> impl Iterator < Item = & [ llama_cpp_sys_2:: llama_seq_id ] > {
219237 unsafe {
220238 std:: slice:: from_raw_parts (
0 commit comments