Skip to content

Commit e896487

Browse files
authored
feat: fix aHashmap typebinding error to tokenizer (#79)
1 parent 5f68f07 commit e896487

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

rust/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ crate-type = ["staticlib"]
1111
tokenizers = { version = "0.21.1", default-features = false, features = ["onig"] }
1212
serde = { version = "1.0", features = [ "derive" ] }
1313
serde_json = "1.0"
14+
ahash = "0.8.12"

rust/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// A simple C wrapper of tokenzier library
2+
use ahash::AHashMap;
23
use serde_json::Value;
3-
use std::{collections::HashMap, str::FromStr};
4+
use std::str::FromStr;
45
use tokenizers::models::bpe::BPE;
56
use tokenizers::pre_tokenizers::byte_level::ByteLevel;
67
use tokenizers::tokenizer::Tokenizer;
@@ -11,7 +12,7 @@ pub struct TokenizerWrapper {
1112
id_to_token_result: String,
1213
}
1314

14-
pub type Vocab = HashMap<String, u32>;
15+
pub type Vocab = AHashMap<String, u32>;
1516
pub type Merges = Vec<(String, String)>;
1617

1718
#[repr(C)]
@@ -35,7 +36,7 @@ impl TokenizerWrapper {
3536
added_tokens: &str,
3637
) -> TokenizerWrapper {
3738
let vocab_json: Value = serde_json::from_str(vocab).unwrap();
38-
let mut vocab = HashMap::new();
39+
let mut vocab = ahash::AHashMap::new();
3940
match vocab_json {
4041
Value::Object(m) => {
4142
for (token, id) in m {

0 commit comments

Comments
 (0)