@@ -13,9 +13,8 @@ use crate::model::params::LlamaModelParams;
13
13
use crate :: token:: LlamaToken ;
14
14
use crate :: token_type:: { LlamaTokenAttr , LlamaTokenAttrs } ;
15
15
use crate :: {
16
- ApplyChatTemplateError , ChatTemplateError , InternalChatTemplateError , LlamaContextLoadError ,
17
- LlamaLoraAdapterInitError , LlamaModelLoadError , NewLlamaChatMessageError , StringToTokenError ,
18
- TokenToStringError ,
16
+ ApplyChatTemplateError , ChatTemplateError , LlamaContextLoadError , LlamaLoraAdapterInitError ,
17
+ LlamaModelLoadError , NewLlamaChatMessageError , StringToTokenError , TokenToStringError ,
19
18
} ;
20
19
21
20
pub mod params;
@@ -36,7 +35,7 @@ pub struct LlamaLoraAdapter {
36
35
pub ( crate ) lora_adapter : NonNull < llama_cpp_sys_2:: llama_adapter_lora > ,
37
36
}
38
37
39
- /// A performance-friendly wrapper around [LlamaModel::get_chat_template ] which is then
38
+ /// A performance-friendly wrapper around [LlamaModel::chat_template ] which is then
40
39
/// fed into [LlamaModel::apply_chat_template] to convert a list of messages into an LLM
41
40
/// prompt. Internally the template is stored as a CString to avoid round-trip conversions
42
41
/// within the FFI.
@@ -506,83 +505,38 @@ impl LlamaModel {
506
505
}
507
506
}
508
507
509
- fn get_chat_template_impl (
510
- & self ,
511
- capacity : usize ,
512
- ) -> Result < LlamaChatTemplate , InternalChatTemplateError > {
513
- // longest known template is about 1200 bytes from llama.cpp
514
- // TODO: Once MaybeUninit support is better, this can be converted to use that instead of dummy initializing such a large array.
515
- let mut chat_temp = vec ! [ b'*' as u8 ; capacity] ;
516
- let chat_name =
517
- CStr :: from_bytes_with_nul ( b"tokenizer.chat_template\0 " ) . expect ( "should have null byte" ) ;
518
-
519
- let ret = unsafe {
520
- llama_cpp_sys_2:: llama_model_meta_val_str (
521
- self . model . as_ptr ( ) ,
522
- chat_name. as_ptr ( ) ,
523
- chat_temp. as_mut_ptr ( ) as * mut c_char ,
524
- chat_temp. len ( ) ,
525
- )
526
- } ;
527
-
528
- if ret < 0 {
529
- return Err ( InternalChatTemplateError :: Permanent (
530
- ChatTemplateError :: MissingTemplate ( ret) ,
531
- ) ) ;
532
- }
533
-
534
- let returned_len = ret as usize ;
535
-
536
- if ret as usize >= capacity {
537
- // >= is important because if the returned length is equal to capacity, it means we're missing a trailing null
538
- // since the returned length doesn't count the trailing null.
539
- return Err ( InternalChatTemplateError :: RetryWithLargerBuffer (
540
- returned_len,
541
- ) ) ;
542
- }
543
-
544
- assert_eq ! (
545
- chat_temp. get( returned_len) ,
546
- Some ( & 0 ) ,
547
- "should end with null byte"
548
- ) ;
549
-
550
- chat_temp. resize ( returned_len + 1 , 0 ) ;
551
-
552
- Ok ( LlamaChatTemplate ( unsafe {
553
- CString :: from_vec_with_nul_unchecked ( chat_temp)
554
- } ) )
555
- }
556
-
557
- /// Get chat template from model. If this fails, you may either want to fail to chat or pick the
558
- /// specific shortcode that llama.cpp supports templates it has baked-in directly into its codebase
559
- /// as fallbacks when the model doesn't contain. NOTE: If you don't specify a chat template, then
560
- /// it uses chatml by default which is unlikely to actually be the correct template for your model
561
- /// and you'll get weird results back.
508
+ /// Get chat template from model by name. If the name parameter is None, the default chat template will be returned.
562
509
///
563
510
/// You supply this into [Self::apply_chat_template] to get back a string with the appropriate template
564
511
/// substitution applied to convert a list of messages into a prompt the LLM can use to complete
565
512
/// the chat.
566
513
///
514
+ /// You could also use an external jinja parser, like [minijinja](https://github.com/mitsuhiko/minijinja),
515
+ /// to parse jinja templates not supported by the llama.cpp template engine.
516
+ ///
567
517
/// # Errors
568
518
///
569
- /// * If the model has no chat template
519
+ /// * If the model has no chat template by that name
570
520
/// * If the chat template is not a valid [`CString`].
571
- #[ allow( clippy:: missing_panics_doc) ] // we statically know this will not panic as
572
- pub fn get_chat_template ( & self ) -> Result < LlamaChatTemplate , ChatTemplateError > {
573
- // Typical chat templates are quite small. Let's start with a small allocation likely to succeed.
574
- // Ideally the performance of this would be negligible but uninitialized arrays in Rust are currently
575
- // still not well supported so we end up initializing the chat template buffer twice. One idea might
576
- // be to use a very small value here that will likely fail (like 0 or 1) and then use that to initialize.
577
- // Not sure which approach is the most optimal but in practice this should work well.
578
- match self . get_chat_template_impl ( 200 ) {
579
- Ok ( t) => Ok ( t) ,
580
- Err ( InternalChatTemplateError :: Permanent ( e) ) => Err ( e) ,
581
- Err ( InternalChatTemplateError :: RetryWithLargerBuffer ( actual_len) ) => match self . get_chat_template_impl ( actual_len + 1 ) {
582
- Ok ( t) => Ok ( t) ,
583
- Err ( InternalChatTemplateError :: Permanent ( e) ) => Err ( e) ,
584
- Err ( InternalChatTemplateError :: RetryWithLargerBuffer ( unexpected_len) ) => panic ! ( "Was told that the template length was {actual_len} but now it's {unexpected_len}" ) ,
585
- }
521
+ pub fn chat_template (
522
+ & self ,
523
+ name : Option < & str > ,
524
+ ) -> Result < LlamaChatTemplate , ChatTemplateError > {
525
+ let name_cstr = name. map ( CString :: new) ;
526
+ let name_ptr = match name_cstr {
527
+ Some ( Ok ( name) ) => name. as_ptr ( ) ,
528
+ _ => std:: ptr:: null ( ) ,
529
+ } ;
530
+ let result =
531
+ unsafe { llama_cpp_sys_2:: llama_model_chat_template ( self . model . as_ptr ( ) , name_ptr) } ;
532
+
533
+ // Convert result to Rust String if not null
534
+ if result. is_null ( ) {
535
+ Err ( ChatTemplateError :: MissingTemplate )
536
+ } else {
537
+ let chat_template_cstr = unsafe { CStr :: from_ptr ( result) } ;
538
+ let chat_template = CString :: new ( chat_template_cstr. to_bytes ( ) ) ?;
539
+ Ok ( LlamaChatTemplate ( chat_template) )
586
540
}
587
541
}
588
542
@@ -672,7 +626,7 @@ impl LlamaModel {
672
626
/// use "chatml", then just do `LlamaChatTemplate::new("chatml")` or any other model name or template
673
627
/// string.
674
628
///
675
- /// Use [Self::get_chat_template ] to retrieve the template baked into the model (this is the preferred
629
+ /// Use [Self::chat_template ] to retrieve the template baked into the model (this is the preferred
676
630
/// mechanism as using the wrong chat template can result in really unexpected responses from the LLM).
677
631
///
678
632
/// You probably want to set `add_ass` to true so that the generated template string ends with a the
0 commit comments