Merge pull request #202 from utilityai/update-llama-cpp-2024-03-19

MarcusDunn · web-flow · commit 047898ce5c8f · 2024-03-19T11:49:44.000-07:00
Updated llama-cpp (bot)
diff --git a/llama-cpp-2/src/context/kv_cache.rs b/llama-cpp-2/src/context/kv_cache.rs
@@ -238,11 +238,11 @@ impl<'a> KVCacheView<'a> {
         unsafe {
             std::slice::from_raw_parts(
                 self.view.cells_sequences,
-                usize::try_from(self.view.n_cells * self.view.n_max_seq)
+                usize::try_from(self.view.n_cells * self.view.n_seq_max)
                     .expect("failed to fit n_cells * n_max_seq into usize"),
             )
         }
-        .chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))
+        .chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))
     }
 }
 
diff --git a/llama-cpp-2/src/context/params.rs b/llama-cpp-2/src/context/params.rs
@@ -161,7 +161,7 @@ impl LlamaContextParams {
     /// ```rust
     /// use llama_cpp_2::context::params::LlamaContextParams;
     /// let params = LlamaContextParams::default();
-    /// assert_eq!(params.n_batch(), 512);
+    /// assert_eq!(params.n_batch(), 2048);
     /// ```
     #[must_use]
     pub fn n_batch(&self) -> u32 {
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -61,11 +61,15 @@ pub enum LLamaCppError {
     EmbeddingError(#[from] EmbeddingsError),
 }
 
+/// There was an error while getting the chat template from a model.
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
 pub enum ChatTemplateError {
     /// gguf has no chat template
-    #[error("model has no chat template in gguf")]
-    NullReturn,
+    #[error("the model has no meta val - returned code {0}")]
+    MissingTemplate(i32),
+    /// The chat template was not valid utf8.
+    #[error(transparent)]
+    Utf8Error(#[from] std::str::Utf8Error),
 }
 
 /// Failed to Load context
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -277,32 +277,35 @@ impl LlamaModel {
         unsafe { llama_cpp_sys_2::llama_n_embd(self.model.as_ptr()) }
     }
 
-    /// get chat template from model
-    /// let chat_template = model.get_chat_template()?;
+    /// Get chat template from model.
     ///
-    pub fn get_chat_template(&self) -> Result<String, ChatTemplateError> {
+    /// # Errors
+    /// 
+    /// * If the model has no chat template
+    /// * If the chat template is not a valid [`CString`].
+    #[allow(clippy::missing_panics_doc)] // we statically know this will not panic as
+    pub fn get_chat_template(&self, buf_size: usize) -> Result<String, ChatTemplateError> {
+        
+        // longest known template is about 1200 bytes from llama.cpp
+        let chat_temp = CString::new(vec![b'*'; buf_size]).expect("no null");
+        let chat_ptr = chat_temp.into_raw();
+        let chat_name = CString::new("tokenizer.chat_template").expect("no null bytes");
+        
         let chat_template: String = unsafe {
-            // longest known template is about 1200 bytes from llama.cpp
-            let chat_temp = match CString::new(Vec::<u8>::with_capacity(2048)) {
-                Ok(c) => c,
-                Err(_) => return Err(ChatTemplateError::NullReturn),
-            };
-            let chat_ptr = chat_temp.into_raw();
-            let chat_name = match CString::new("tokenizer.chat_template") {
-                Ok(c) => c,
-                Err(_) => return Err(ChatTemplateError::NullReturn),
-            };
-            llama_cpp_sys_2::llama_model_meta_val_str(
+            let ret = llama_cpp_sys_2::llama_model_meta_val_str(
                 self.model.as_ptr(),
                 chat_name.as_ptr(),
                 chat_ptr,
-                250,
+                buf_size,
             );
-            match CString::from_raw(chat_ptr).to_str() {
-                Ok(s) => s.to_string(),
-                Err(_) => return Err(ChatTemplateError::NullReturn),
+            if ret < 0 {
+                return Err(ChatTemplateError::MissingTemplate(ret));
             }
+            let template = CString::from_raw(chat_ptr).to_str()?.to_string();
+            debug_assert_eq!(usize::try_from(ret).unwrap(), template.len(), "llama.cpp guarantees that the returned int {ret} is the length of the string {} but that was not the case", template.len());
+            template
         };
+        
         Ok(chat_template)
     }
 
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -219,7 +219,7 @@ fn metal_hack(build: &mut cc::Build) {
         let ggml_metal =
             std::fs::read_to_string(GGML_METAL_PATH).expect("Could not read ggml-metal.m");
 
-        let needle = r#"NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];"#;
+        let needle = r#"NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];"#;
         if !ggml_metal.contains(needle) {
             panic!("ggml-metal.m does not contain the needle to be replaced; the patching logic needs to be reinvestigated. Contact a `llama-cpp-sys-2` developer!");
         }
diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp
@@ -1 +1 @@
-Subproject commit c2101a2e909ac7c08976d414e64e96c90ee5fa9e
+Subproject commit 2d15886bb092c3b780c676b5cc57ff3337af9c83

Original file line number	Diff line number	Diff line change
`@@ -238,11 +238,11 @@ impl<'a> KVCacheView<'a> {`
`238`	`238`	`unsafe {`
`239`	`239`	`std::slice::from_raw_parts(`
`240`	`240`	`self.view.cells_sequences,`
`241`		`- usize::try_from(self.view.n_cells * self.view.n_max_seq)`
	`241`	`+ usize::try_from(self.view.n_cells * self.view.n_seq_max)`
`242`	`242`	`.expect("failed to fit n_cells * n_max_seq into usize"),`
`243`	`243`	`)`
`244`	`244`	`}`
`245`		`- .chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))`
	`245`	`+ .chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))`
`246`	`246`	`}`
`247`	`247`	`}`
`248`	`248`
Original file line number	Diff line number	Diff line change
`@@ -219,7 +219,7 @@ fn metal_hack(build: &mut cc::Build) {`
`219`	`219`	`let ggml_metal =`
`220`	`220`	`std::fs::read_to_string(GGML_METAL_PATH).expect("Could not read ggml-metal.m");`
`221`	`221`
`222`		`- let needle = r#"NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];"#;`
	`222`	`+ let needle = r#"NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];"#;`
`223`	`223`	`if !ggml_metal.contains(needle) {`
`224`	`224`	panic!("ggml-metal.m does not contain the needle to be replaced; the patching logic needs to be reinvestigated. Contact a `llama-cpp-sys-2` developer!");
`225`	`225`	`}`