Merge pull request #194 from bruceunx/feat-chat-template

MarcusDunn · web-flow · commit 8447c4dd5ba7 · 2024-03-17T07:04:37.000-07:00
added feature get chat template from gguf model
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -41,6 +41,9 @@ pub enum LLamaCppError {
     /// is idempotent.
     #[error("BackendAlreadyInitialized")]
     BackendAlreadyInitialized,
+    /// There was an error while get the chat template from model.
+    #[error("{0}")]
+    ChatTemplateError(#[from] ChatTemplateError),
     /// There was an error while decoding a batch.
     #[error("{0}")]
     DecodeError(#[from] DecodeError),
@@ -58,6 +61,13 @@ pub enum LLamaCppError {
     EmbeddingError(#[from] EmbeddingsError),
 }
 
+#[derive(Debug, Eq, PartialEq, thiserror::Error)]
+pub enum ChatTemplateError {
+    /// gguf has no chat template
+    #[error("model has no chat template in gguf")]
+    NullReturn,
+}
+
 /// Failed to Load context
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
 pub enum LlamaContextLoadError {
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -10,7 +10,10 @@ use crate::llama_backend::LlamaBackend;
 use crate::model::params::LlamaModelParams;
 use crate::token::LlamaToken;
 use crate::token_type::LlamaTokenType;
-use crate::{LlamaContextLoadError, LlamaModelLoadError, StringToTokenError, TokenToStringError};
+use crate::{
+    ChatTemplateError, LlamaContextLoadError, LlamaModelLoadError, StringToTokenError,
+    TokenToStringError,
+};
 
 pub mod params;
 
@@ -274,6 +277,35 @@ impl LlamaModel {
         unsafe { llama_cpp_sys_2::llama_n_embd(self.model.as_ptr()) }
     }
 
+    /// get chat template from model
+    /// let chat_template = model.get_chat_template()?;
+    ///
+    pub fn get_chat_template(&self) -> Result<String, ChatTemplateError> {
+        let chat_template: String = unsafe {
+            // longest known template is about 1200 bytes from llama.cpp
+            let chat_temp = match CString::new(Vec::<u8>::with_capacity(2048)) {
+                Ok(c) => c,
+                Err(_) => return Err(ChatTemplateError::NullReturn),
+            };
+            let chat_ptr = chat_temp.into_raw();
+            let chat_name = match CString::new("tokenizer.chat_template") {
+                Ok(c) => c,
+                Err(_) => return Err(ChatTemplateError::NullReturn),
+            };
+            llama_cpp_sys_2::llama_model_meta_val_str(
+                self.model.as_ptr(),
+                chat_name.as_ptr(),
+                chat_ptr,
+                250,
+            );
+            match CString::from_raw(chat_ptr).to_str() {
+                Ok(s) => s.to_string(),
+                Err(_) => return Err(ChatTemplateError::NullReturn),
+            }
+        };
+        Ok(chat_template)
+    }
+
     /// loads a model from a file.
     ///
     /// # Errors