updated llama cpp and removed cast to mut

MarcusDunn · MarcusDunn · commit 07ab0ebd4b51 · 2024-02-29T10:12:23.000-08:00
diff --git a/llama-cpp-2/src/context/kv_cache.rs b/llama-cpp-2/src/context/kv_cache.rs
@@ -1,7 +1,8 @@
 //! utilities for working with the kv cache
 
-use std::num::NonZeroU8;
 use crate::context::LlamaContext;
+use std::ffi::c_int;
+use std::num::NonZeroU8;
 
 impl LlamaContext<'_> {
     /// Copy the cache from one sequence to another.
@@ -106,14 +107,20 @@ impl LlamaContext<'_> {
     /// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to [p1].
     /// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from [p0].
     /// * `d` - The factor to divide the positions by
-    pub fn kv_cache_seq_div(&mut self, seq_id: i32, p0: Option<u16>, p1: Option<u16>, d: NonZeroU8) {
+    pub fn kv_cache_seq_div(
+        &mut self,
+        seq_id: i32,
+        p0: Option<u16>,
+        p1: Option<u16>,
+        d: NonZeroU8,
+    ) {
         unsafe {
             llama_cpp_sys_2::llama_kv_cache_seq_div(
                 self.context.as_ptr(),
                 seq_id,
                 p0.map_or(-1, i32::from),
                 p1.map_or(-1, i32::from),
-                d.get().try_into().expect("d does not fit into a i32"),
+                c_int::from(d.get()),
             )
         }
     }
@@ -154,12 +161,12 @@ impl LlamaContext<'_> {
     ///                 if there are more sequences in a cell than this value, however they will
     ///                 not be visible in the view cells_sequences.
     pub fn new_kv_cache_view(&self, n_max_seq: i32) -> KVCacheView {
-        let view = unsafe { llama_cpp_sys_2::llama_kv_cache_view_init(self.context.as_ptr(), n_max_seq) };
+        let view =
+            unsafe { llama_cpp_sys_2::llama_kv_cache_view_init(self.context.as_ptr(), n_max_seq) };
         KVCacheView { view, ctx: self }
     }
 }
 
-
 /// Information associated with an individual cell in the KV cache view.
 #[derive(Debug)]
 pub struct KVCacheViewCell {
@@ -178,7 +185,9 @@ pub struct KVCacheView<'a> {
 impl<'a> KVCacheView<'a> {
     /// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
     pub fn update(&mut self) {
-        unsafe { llama_cpp_sys_2::llama_kv_cache_view_update(self.ctx.context.as_ptr(), &mut self.view) }
+        unsafe {
+            llama_cpp_sys_2::llama_kv_cache_view_update(self.ctx.context.as_ptr(), &mut self.view)
+        }
     }
 
     /// Number of KV cache cells. This will be the same as the context size.
@@ -210,16 +219,27 @@ impl<'a> KVCacheView<'a> {
     }
 
     /// Information for individual cells.
-    pub fn cells(&self) -> impl Iterator<Item=KVCacheViewCell> {
-        unsafe { std::slice::from_raw_parts(self.view.cells, self.view.n_cells.try_into().unwrap()) }
-            .iter()
-            .map(|&cell| KVCacheViewCell { pos: cell.pos })
+    pub fn cells(&self) -> impl Iterator<Item = KVCacheViewCell> {
+        unsafe {
+            std::slice::from_raw_parts(
+                self.view.cells,
+                usize::try_from(self.view.n_cells).expect("failed to fit n_cells into usize"),
+            )
+        }
+        .iter()
+        .map(|&cell| KVCacheViewCell { pos: cell.pos })
     }
 
     /// The sequences for each cell. There will be n_max_seq items per cell.
-    pub fn cells_sequences(&self) -> impl Iterator<Item=&[llama_cpp_sys_2::llama_seq_id]> {
-        unsafe { std::slice::from_raw_parts(self.view.cells_sequences, (self.view.n_cells * self.view.n_max_seq).try_into().unwrap()) }
-            .chunks(self.view.n_max_seq.try_into().unwrap())
+    pub fn cells_sequences(&self) -> impl Iterator<Item = &[llama_cpp_sys_2::llama_seq_id]> {
+        unsafe {
+            std::slice::from_raw_parts(
+                self.view.cells_sequences,
+                usize::try_from(self.view.n_cells * self.view.n_max_seq)
+                    .expect("failed to fit n_cells * n_max_seq into usize"),
+            )
+        }
+        .chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))
     }
 }
 
@@ -229,4 +249,4 @@ impl<'a> Drop for KVCacheView<'a> {
             llama_cpp_sys_2::llama_kv_cache_view_free(&mut self.view);
         }
     }
-}
+}
diff --git a/llama-cpp-2/src/context/session.rs b/llama-cpp-2/src/context/session.rs
@@ -150,9 +150,7 @@ impl LlamaContext<'_> {
     /// Returns the number of bytes read
     pub unsafe fn set_state_data(&mut self, src: &[u8]) -> usize {
         unsafe {
-            // we don't really need a mutable pointer for `src` -- this is a llama-cpp lapse,
-            // so we cast away the constness
-            llama_cpp_sys_2::llama_set_state_data(self.context.as_ptr(), src.as_ptr() as *mut u8)
+            llama_cpp_sys_2::llama_set_state_data(self.context.as_ptr(), src.as_ptr())
         }
     }
 }
diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp
@@ -1 +1 @@
-Subproject commit 87c91c07663b707e831c59ec373b5e665ff9d64a
+Subproject commit d5ab29757ebc59a30f03e408294ec20628a6374e

Original file line number	Diff line number	Diff line change
`@@ -150,9 +150,7 @@ impl LlamaContext<'_> {`
`150`	`150`	`/// Returns the number of bytes read`
`151`	`151`	`pub unsafe fn set_state_data(&mut self, src: &[u8]) -> usize {`
`152`	`152`	`unsafe {`
`153`		- // we don't really need a mutable pointer for `src` -- this is a llama-cpp lapse,
`154`		`- // so we cast away the constness`
`155`		`- llama_cpp_sys_2::llama_set_state_data(self.context.as_ptr(), src.as_ptr() as *mut u8)`
	`153`	`+ llama_cpp_sys_2::llama_set_state_data(self.context.as_ptr(), src.as_ptr())`
`156`	`154`	`}`
`157`	`155`	`}`
`158`	`156`	`}`