File tree Expand file tree Collapse file tree 3 files changed +8
-7
lines changed Expand file tree Collapse file tree 3 files changed +8
-7
lines changed Original file line number Diff line number Diff line change @@ -162,11 +162,11 @@ fn hash_current_thread() -> usize {
162
162
u64:: from ( x) as usize
163
163
}
164
164
165
- const MAX_NUM_THREADS : usize = 128 ;
165
+ const MAX_NUM_THREADS : usize = 8 ;
166
166
167
167
#[ derive( Debug ) ]
168
168
pub struct CoreBPE {
169
- encoder : HashMap < Vec < u8 > , usize > ,
169
+ encoder : Arc < HashMap < Vec < u8 > , usize > > ,
170
170
special_tokens_encoder : HashMap < String , usize > ,
171
171
decoder : HashMap < usize , Vec < u8 > > ,
172
172
special_tokens_decoder : HashMap < usize , Vec < u8 > > ,
@@ -429,7 +429,7 @@ impl CoreBPE {
429
429
430
430
impl CoreBPE {
431
431
pub fn new (
432
- encoder : HashMap < Vec < u8 > , usize > ,
432
+ encoder : Arc < HashMap < Vec < u8 > , usize > > ,
433
433
special_tokens_encoder : HashMap < String , usize > ,
434
434
pattern : & str ,
435
435
) -> Result < Self , fancy_regex:: Error > {
Original file line number Diff line number Diff line change @@ -14,7 +14,7 @@ pub struct Encoding {
14
14
/// The regular expression pattern used to split text into pieces.
15
15
pat_str : String ,
16
16
/// The map from mergeable byte sequences to their ranks.
17
- mergeable_ranks : HashMap < Vec < u8 > , usize > ,
17
+ mergeable_ranks : Arc < HashMap < Vec < u8 > , usize > > ,
18
18
/// The maximum length of the keys in `mergeable_ranks`.
19
19
mergeable_ranks_max_key_len : usize ,
20
20
/// All prefixes of the mergeable ranks. May or may not be tokens themselves!
@@ -64,7 +64,7 @@ impl Encoding {
64
64
pub fn new (
65
65
name : & str ,
66
66
pat_str : & str ,
67
- mergeable_ranks : HashMap < Vec < u8 > , usize > ,
67
+ mergeable_ranks : Arc < HashMap < Vec < u8 > , usize > > ,
68
68
special_tokens : HashMap < String , usize > ,
69
69
explicit_n_vocab : Option < usize > ,
70
70
) -> Result < Self , EncodingError > {
Original file line number Diff line number Diff line change @@ -5,6 +5,7 @@ use sha2::Sha256;
5
5
// call its methods without adding to the namespace.
6
6
use base64:: engine:: general_purpose:: STANDARD as BASE64 ;
7
7
use base64:: engine:: Engine as _;
8
+ use std:: sync:: Arc ;
8
9
9
10
// define the error
10
11
#[ derive( Debug , Clone ) ]
@@ -16,7 +17,7 @@ pub enum Error {
16
17
pub fn load_tiktoken_bpe (
17
18
tiktoken_bpe_contents : & [ u8 ] ,
18
19
shasum : & str ,
19
- ) -> Result < HashMap < Vec < u8 > , usize > , Error > {
20
+ ) -> Result < Arc < HashMap < Vec < u8 > , usize > > , Error > {
20
21
// check the shasum
21
22
let mut hasher = Sha256 :: new ( ) ;
22
23
hasher. update ( tiktoken_bpe_contents) ;
@@ -42,5 +43,5 @@ pub fn load_tiktoken_bpe(
42
43
map. insert ( token, rank) ;
43
44
}
44
45
map. shrink_to_fit ( ) ;
45
- Ok ( map)
46
+ Ok ( Arc :: new ( map) )
46
47
}
You can’t perform that action at this time.
0 commit comments