@@ -17,7 +17,7 @@ impl Reader {
17
17
pub fn new ( ) -> Self {
18
18
Self {
19
19
unicode : false ,
20
- src : "" . to_string ( ) ,
20
+ src : "" . to_owned ( ) ,
21
21
index : 0 ,
22
22
end : 0 ,
23
23
cps : VecDeque :: with_capacity ( 4 ) ,
@@ -39,7 +39,7 @@ impl Reader {
39
39
40
40
pub fn reset ( & mut self , source : & str , start : usize , end : usize , u_flag : bool ) {
41
41
self . unicode = u_flag;
42
- self . src = source. into ( ) ;
42
+ self . src = source. to_owned ( ) ;
43
43
self . end = end;
44
44
self . rewind ( start) ;
45
45
}
@@ -109,22 +109,27 @@ impl Reader {
109
109
}
110
110
111
111
fn at ( & self , i : usize ) -> Option < char > {
112
+ //println!("{:?}", self.src.as_bytes());
113
+ //println!("{:?}", self.src.encode_utf16().collect::<Vec<u16>>());
112
114
if i >= self . end {
113
115
None
114
116
} else if self . unicode {
115
117
// TODO: read non ASCII as UTF-8
116
- let c: char = self . src . as_bytes ( ) [ i] . into ( ) ;
118
+ //let c: char = self.src.as_bytes()[i].into();
119
+ let c = self . src . chars ( ) . skip ( i) . next ( ) . unwrap ( ) ;
117
120
Some ( c)
118
121
} else {
119
- // TODO: read non ASCII as UTF-16
120
- let c: char = self . src . as_bytes ( ) [ i] . into ( ) ;
121
- Some ( c)
122
+ // TODO: move the conversion out of this method and make it safe
123
+ unsafe {
124
+ let c: char = std:: char:: from_u32_unchecked ( self . src . encode_utf16 ( ) . skip ( i) . next ( ) . unwrap ( ) as u32 ) ;
125
+ Some ( c)
126
+ }
122
127
}
123
128
}
124
129
125
130
fn width ( & self , c : char ) -> usize {
126
131
if self . unicode && c > '\u{FFFF}' {
127
- 2
132
+ 1
128
133
} else {
129
134
1
130
135
}
@@ -162,12 +167,26 @@ mod tests {
162
167
assert_eq ! ( reader. eat3( 'b' , 'c' , 'd' ) , true ) ;
163
168
}
164
169
165
- /* #[test]
170
+ #[ test]
166
171
fn at_test_es_compliance ( ) {
167
172
let mut reader = Reader :: new ( ) ;
168
- reader.reset("a🩢☃★♲", 0, 20, false);
173
+ // without unicode flag
174
+ reader. reset ( "Hello" , 0 , 5 , false ) ;
175
+ assert_eq ! ( reader. at( 1 ) . unwrap( ) as u32 , 101 ) ;
176
+ reader. reset ( "a🩢☃★♲" , 0 , 6 , false ) ;
169
177
assert_eq ! ( reader. at( 0 ) . unwrap( ) as u32 , 56256 ) ;
170
- reader.reset("a🩢☃★♲", 0, 20, true);
178
+ reader. reset ( "ello" , 0 , 6 , false ) ;
179
+ assert_eq ! ( reader. at( 0 ) . unwrap( ) as u32 , 56256 ) ;
180
+ reader. reset ( "ello" , 0 , 6 , false ) ;
181
+ assert_eq ! ( reader. at( 1 ) . unwrap( ) as u32 , 56515 ) ;
182
+ // with unicode flag
183
+ reader. reset ( "Hello" , 0 , 5 , true ) ;
184
+ assert_eq ! ( reader. at( 1 ) . unwrap( ) as u32 , 101 ) ;
185
+ reader. reset ( "a🩢☃★♲" , 0 , 6 , true ) ;
186
+ assert_eq ! ( reader. at( 0 ) . unwrap( ) as u32 , 1048771 ) ;
187
+ reader. reset ( "ello" , 0 , 6 , true ) ;
171
188
assert_eq ! ( reader. at( 0 ) . unwrap( ) as u32 , 1048771 ) ;
172
- }*/
189
+ //reader.reset("ello", 0, 6, true);
190
+ //assert_eq!(reader.at(1).unwrap() as u32, 56515);
191
+ }
173
192
}
0 commit comments