File tree Expand file tree Collapse file tree 1 file changed +14
-10
lines changed
Expand file tree Collapse file tree 1 file changed +14
-10
lines changed Original file line number Diff line number Diff line change @@ -45,14 +45,18 @@ impl Tokenizer {
4545
4646 pub fn decode ( & self , token : utok ) -> Cow < str > {
4747 let piece = self . tokenize . decode ( token) ;
48- let ans = piece
49- . chars ( )
50- . map ( |c| * self . de_replace . get ( & c) . unwrap_or ( & c) )
51- . collect :: < String > ( ) ;
52- if ans == piece {
53- piece. into ( )
48+ if let Ok ( piece) = from_utf8 ( piece) {
49+ let ans = piece
50+ . chars ( )
51+ . map ( |c| * self . de_replace . get ( & c) . unwrap_or ( & c) )
52+ . collect :: < String > ( ) ;
53+ if ans == piece {
54+ piece. into ( )
55+ } else {
56+ ans. into ( )
57+ }
5458 } else {
55- ans . into ( )
59+ unsafe { from_utf8_unchecked ( piece ) } . into ( )
5660 }
5761 }
5862
@@ -133,7 +137,7 @@ trait Tokenize {
133137 /// Encode a text into a sequence of tokens.
134138 fn encode ( & self , text : & str ) -> Vec < utok > ;
135139 /// Decode a token into str.
136- fn decode ( & self , token : utok ) -> & str ;
140+ fn decode ( & self , token : utok ) -> & [ u8 ] ;
137141}
138142
139143impl < M : tokeneer:: Method > Tokenize for Tokeneer < M > {
@@ -142,8 +146,8 @@ impl<M: tokeneer::Method> Tokenize for Tokeneer<M> {
142146 self . encode ( text)
143147 }
144148 #[ inline]
145- fn decode ( & self , token : utok ) -> & str {
146- unsafe { from_utf8_unchecked ( self . internal ( ) . decode ( token) ) }
149+ fn decode ( & self , token : utok ) -> & [ u8 ] {
150+ self . internal ( ) . decode ( token)
147151 }
148152}
149153
You can’t perform that action at this time.
0 commit comments