@@ -8,10 +8,11 @@ use chat_template::ChatTemplate;
88use session:: { Dispatcher , Generator } ;
99use std:: {
1010 fmt:: { self , Debug } ,
11+ fs:: File ,
1112 path:: Path ,
1213 sync:: Arc ,
1314} ;
14- use tokenizer:: { BPECommonNormalizer , Normalizer , Tokenize , Tokenizer , VocabTxt , BPE } ;
15+ use tokenizer:: { BPECommonNormalizer , Bpe , Normalizer , Tokeneer , Tokenize , VocabTxt } ;
1516use tokio:: task:: JoinHandle ;
1617
1718pub use chat_template:: Message ;
@@ -152,10 +153,8 @@ fn template(model_dir: impl AsRef<Path>) -> ChatTemplate {
152153
153154fn normalizer ( model_dir : impl AsRef < Path > ) -> Box < dyn Normalizer + Send + Sync > {
154155 use std:: io:: ErrorKind :: NotFound ;
155- match BPE :: from_tokenizer_model ( model_dir. as_ref ( ) . join ( "tokenizer.model" ) ) {
156- Ok ( _) => return Box :: new ( BPECommonNormalizer { } ) ,
157- Err ( e) if e. kind ( ) == NotFound => { }
158- Err ( e) => panic ! ( "{e:?}" ) ,
156+ if model_dir. as_ref ( ) . join ( "tokenizer.model" ) . is_file ( ) {
157+ return Box :: new ( BPECommonNormalizer { } ) ;
159158 }
160159 match VocabTxt :: from_txt_file ( model_dir. as_ref ( ) . join ( "vocabs.txt" ) ) {
161160 Ok ( _) => return Box :: new ( ( ) ) ,
@@ -167,8 +166,10 @@ fn normalizer(model_dir: impl AsRef<Path>) -> Box<dyn Normalizer + Send + Sync>
167166
168167fn tokenizer ( model_dir : impl AsRef < Path > ) -> Box < dyn Tokenize + Send + Sync > {
169168 use std:: io:: ErrorKind :: NotFound ;
170- match BPE :: from_tokenizer_model ( model_dir. as_ref ( ) . join ( "tokenizer.model" ) ) {
171- Ok ( bpe) => return Box :: new ( Tokenizer :: new ( bpe) ) ,
169+ let file = File :: open ( model_dir. as_ref ( ) . join ( "tokenizer.model" ) )
170+ . and_then ( |f| unsafe { memmap2:: Mmap :: map ( & f) } ) ;
171+ match file {
172+ Ok ( f) => return Box :: new ( Tokeneer :: new ( Bpe :: from_tokenizer_model ( & f) ) ) ,
172173 Err ( e) if e. kind ( ) == NotFound => { }
173174 Err ( e) => panic ! ( "{e:?}" ) ,
174175 }
0 commit comments