updated llama.cpp

MarcusDunn · MarcusDunn · commit 97c38ad98d0d · 2024-03-19T11:19:22.000-07:00
diff --git a/llama-cpp-2/src/context/kv_cache.rs b/llama-cpp-2/src/context/kv_cache.rs
@@ -238,11 +238,11 @@ impl<'a> KVCacheView<'a> {
         unsafe {
             std::slice::from_raw_parts(
                 self.view.cells_sequences,
-                usize::try_from(self.view.n_cells * self.view.n_max_seq)
+                usize::try_from(self.view.n_cells * self.view.n_seq_max)
                     .expect("failed to fit n_cells * n_max_seq into usize"),
             )
         }
-        .chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))
+        .chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))
     }
 }
 
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -61,11 +61,15 @@ pub enum LLamaCppError {
     EmbeddingError(#[from] EmbeddingsError),
 }
 
+/// There was an error while getting the chat template from a model.
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
 pub enum ChatTemplateError {
     /// gguf has no chat template
-    #[error("model has no chat template in gguf")]
-    NullReturn,
+    #[error("the model has no meta val - returned code {0}")]
+    MissingTemplate(i32),
+    /// The chat template was not valid utf8.
+    #[error(transparent)]
+    Utf8Error(#[from] std::str::Utf8Error),
 }
 
 /// Failed to Load context
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -277,32 +277,35 @@ impl LlamaModel {
         unsafe { llama_cpp_sys_2::llama_n_embd(self.model.as_ptr()) }
     }
 
-    /// get chat template from model
-    /// let chat_template = model.get_chat_template()?;
+    /// Get chat template from model.
     ///
-    pub fn get_chat_template(&self) -> Result<String, ChatTemplateError> {
+    /// # Errors
+    /// 
+    /// * If the model has no chat template
+    /// * If the chat template is not a valid [`CString`].
+    #[allow(clippy::missing_panics_doc)] // we statically know this will not panic as
+    pub fn get_chat_template(&self, buf_size: usize) -> Result<String, ChatTemplateError> {
+        
+        // longest known template is about 1200 bytes from llama.cpp
+        let chat_temp = CString::new(vec![b'*'; buf_size]).expect("no null");
+        let chat_ptr = chat_temp.into_raw();
+        let chat_name = CString::new("tokenizer.chat_template").expect("no null bytes");
+        
         let chat_template: String = unsafe {
-            // longest known template is about 1200 bytes from llama.cpp
-            let chat_temp = match CString::new(Vec::<u8>::with_capacity(2048)) {
-                Ok(c) => c,
-                Err(_) => return Err(ChatTemplateError::NullReturn),
-            };
-            let chat_ptr = chat_temp.into_raw();
-            let chat_name = match CString::new("tokenizer.chat_template") {
-                Ok(c) => c,
-                Err(_) => return Err(ChatTemplateError::NullReturn),
-            };
-            llama_cpp_sys_2::llama_model_meta_val_str(
+            let ret = llama_cpp_sys_2::llama_model_meta_val_str(
                 self.model.as_ptr(),
                 chat_name.as_ptr(),
                 chat_ptr,
-                250,
+                buf_size,
             );
-            match CString::from_raw(chat_ptr).to_str() {
-                Ok(s) => s.to_string(),
-                Err(_) => return Err(ChatTemplateError::NullReturn),
+            if ret < 0 {
+                return Err(ChatTemplateError::MissingTemplate(ret));
             }
+            let template = CString::from_raw(chat_ptr).to_str()?.to_string();
+            debug_assert_eq!(usize::try_from(ret).unwrap(), template.len(), "llama.cpp guarantees that the returned int {ret} is the length of the string {} but that was not the case", template.len());
+            template
         };
+        
         Ok(chat_template)
     }
 

Original file line number	Diff line number	Diff line change
`@@ -238,11 +238,11 @@ impl<'a> KVCacheView<'a> {`
`238`	`238`	`unsafe {`
`239`	`239`	`std::slice::from_raw_parts(`
`240`	`240`	`self.view.cells_sequences,`
`241`		`- usize::try_from(self.view.n_cells * self.view.n_max_seq)`
	`241`	`+ usize::try_from(self.view.n_cells * self.view.n_seq_max)`
`242`	`242`	`.expect("failed to fit n_cells * n_max_seq into usize"),`
`243`	`243`	`)`
`244`	`244`	`}`
`245`		`- .chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))`
	`245`	`+ .chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))`
`246`	`246`	`}`
`247`	`247`	`}`
`248`	`248`