File tree Expand file tree Collapse file tree 1 file changed +26
-0
lines changed
graalpython/lib-graalpython/patches/tokenizers Expand file tree Collapse file tree 1 file changed +26
-0
lines changed Original file line number Diff line number Diff line change @@ -22,3 +22,29 @@ index 6282c31..47e6b12 100644
22
22
23
23
[features]
24
24
default = ["pyo3/extension-module"]
25
+ diff --git a/tokenizers-lib/src/models/bpe/trainer.rs b/tokenizers-lib/src/models/bpe/trainer.rs
26
+ index 43ab848..55f95f8 100644
27
+ --- a/tokenizers-lib/src/models/bpe/trainer.rs
28
+ +++ b/tokenizers-lib/src/models/bpe/trainer.rs
29
+ @@ -518,15 +518,16 @@ impl BpeTrainer {
30
+ let changes = top
31
+ .pos
32
+ .maybe_par_iter()
33
+ - .flat_map(|i| {
34
+ - let w = &words[*i] as *const _ as *mut _;
35
+ + .flat_map(|&i| {
36
+ + let word = &words[i] as *const _ as *mut Word;
37
+ // We can merge each of these words in parallel here because each position
38
+ // can be there only once (HashSet). So this is safe.
39
+ unsafe {
40
+ - let word: &mut Word = &mut (*w);
41
+ - word.merge(top.pair.0, top.pair.1, new_token_id)
42
+ + // let word: &mut Word = &mut (*word);
43
+ + (*word)
44
+ + .merge(top.pair.0, top.pair.1, new_token_id)
45
+ .into_iter()
46
+ - .map(|c| (c, *i))
47
+ + .map(|c| (c, i))
48
+ .collect::<Vec<_>>()
49
+ }
50
+ })
You can’t perform that action at this time.
0 commit comments