@@ -31,9 +31,19 @@ pub(crate) struct Utf16Char {
3131}
3232
3333impl Utf16Char {
34+ /// Returns the length in 8-bit UTF-8 code units.
3435 fn len ( & self ) -> TextSize {
3536 self . end - self . start
3637 }
38+
39+ /// Returns the length in 16-bit UTF-16 code units.
40+ fn len_utf16 ( & self ) -> usize {
41+ if self . len ( ) == TextSize :: from ( 4 ) {
42+ 2
43+ } else {
44+ 1
45+ }
46+ }
3747}
3848
3949impl LineIndex {
@@ -110,7 +120,7 @@ impl LineIndex {
110120 if let Some ( utf16_chars) = self . utf16_lines . get ( & line) {
111121 for c in utf16_chars {
112122 if c. end <= col {
113- res -= usize:: from ( c. len ( ) ) - 1 ;
123+ res -= usize:: from ( c. len ( ) ) - c . len_utf16 ( ) ;
114124 } else {
115125 // From here on, all utf16 characters come *after* the character we are mapping,
116126 // so we don't need to take them into account
@@ -125,7 +135,7 @@ impl LineIndex {
125135 if let Some ( utf16_chars) = self . utf16_lines . get ( & line) {
126136 for c in utf16_chars {
127137 if col > u32:: from ( c. start ) {
128- col += u32:: from ( c. len ( ) ) - 1 ;
138+ col += u32:: from ( c. len ( ) ) - c . len_utf16 ( ) as u32 ;
129139 } else {
130140 // From here on, all utf16 characters come *after* the character we are mapping,
131141 // so we don't need to take them into account
@@ -204,6 +214,9 @@ const C: char = 'メ';
204214
205215 // UTF-16 to UTF-8
206216 assert_eq ! ( col_index. utf16_to_utf8_col( 1 , 19 ) , TextSize :: from( 21 ) ) ;
217+
218+ let col_index = LineIndex :: new ( "a𐐏b" ) ;
219+ assert_eq ! ( col_index. utf16_to_utf8_col( 0 , 3 ) , TextSize :: from( 5 ) ) ;
207220 }
208221
209222 #[ test]
0 commit comments