File tree Expand file tree Collapse file tree 2 files changed +5
-3
lines changed
Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -11,3 +11,4 @@ crate-type = ["staticlib"]
1111tokenizers = { version = " 0.21.1" , default-features = false , features = [" onig" ] }
1212serde = { version = " 1.0" , features = [ " derive" ] }
1313serde_json = " 1.0"
14+ ahash = " 0.8.12"
Original file line number Diff line number Diff line change 11// A simple C wrapper of tokenzier library
2+ use ahash:: AHashMap ;
23use serde_json:: Value ;
3- use std:: { collections :: HashMap , str:: FromStr } ;
4+ use std:: str:: FromStr ;
45use tokenizers:: models:: bpe:: BPE ;
56use tokenizers:: pre_tokenizers:: byte_level:: ByteLevel ;
67use tokenizers:: tokenizer:: Tokenizer ;
@@ -11,7 +12,7 @@ pub struct TokenizerWrapper {
1112 id_to_token_result : String ,
1213}
1314
14- pub type Vocab = HashMap < String , u32 > ;
15+ pub type Vocab = AHashMap < String , u32 > ;
1516pub type Merges = Vec < ( String , String ) > ;
1617
1718#[ repr( C ) ]
@@ -35,7 +36,7 @@ impl TokenizerWrapper {
3536 added_tokens : & str ,
3637 ) -> TokenizerWrapper {
3738 let vocab_json: Value = serde_json:: from_str ( vocab) . unwrap ( ) ;
38- let mut vocab = HashMap :: new ( ) ;
39+ let mut vocab = ahash :: AHashMap :: new ( ) ;
3940 match vocab_json {
4041 Value :: Object ( m) => {
4142 for ( token, id) in m {
You can’t perform that action at this time.
0 commit comments