@@ -8,12 +8,14 @@ import {
88 decodeUtf8N_LE ,
99 encodeCodePointsToUtf8Into ,
1010 encodeTextToUtf8 ,
11+ encodeTextToUtf8_32 ,
12+ encodeTextToUtf8_32Into ,
1113 encodeTextToUtf8Into ,
1214 encodeUtf8N_BE ,
1315 encodeUtf8N_LE ,
1416 textToCodePoints ,
1517} from '../src/lib/TrieBlob/Utf8.ts' ;
16- import { Utf8Encoder } from '../src/lib/TrieBlob/Utf8Encoder.ts' ;
18+ import { Utf8Encoder , Utf8Encoder2 } from '../src/lib/TrieBlob/Utf8Encoder.ts' ;
1719
1820const iterations = 1000 ;
1921const text = sampleText ( ) ;
@@ -23,6 +25,7 @@ suite('Utf8 encode', async (test) => {
2325 const encoder = new TextEncoder ( ) ;
2426 const scratchBuffer = new Uint8Array ( 1024 ) ;
2527 const utf8Encoder = new Utf8Encoder ( ) ;
28+ const utf8Encoder2 = new Utf8Encoder2 ( 1024 ) ;
2629
2730 test ( `TextEncoder.encodeInto words (${ words . length } )` , ( ) => {
2831 const buffer = scratchBuffer ;
@@ -124,6 +127,57 @@ suite('Utf8 encode', async (test) => {
124127 }
125128 } ) ;
126129
130+ test ( `utf8Encoder2(word) to array words (${ words . length } )` , ( ) => {
131+ const _words = words ;
132+ for ( let i = iterations ; i > 0 ; -- i ) {
133+ for ( const word of _words ) {
134+ utf8Encoder2 . encode ( word ) ;
135+ }
136+ }
137+ } ) ;
138+
139+ test ( `toUtf8Array(word) to array words (${ words . length } )` , ( ) => {
140+ const _words = words ;
141+ for ( let i = iterations ; i > 0 ; -- i ) {
142+ for ( const word of _words ) {
143+ toUtf8Array ( word ) ;
144+ }
145+ }
146+ } ) ;
147+
148+ test ( `toCodePoints(word) to array words (${ words . length } )` , ( ) => {
149+ const _words = words ;
150+ for ( let i = iterations ; i > 0 ; -- i ) {
151+ for ( const word of _words ) {
152+ toCodePoints ( word ) ;
153+ }
154+ }
155+ } ) ;
156+
157+ test ( `encodeTextToUtf8PointsInto(word) to array words (${ words . length } )` , ( ) => {
158+ const _words = words ;
159+ const buffer : number [ ] = new Array ( 100 ) ;
160+ for ( let i = iterations ; i > 0 ; -- i ) {
161+ for ( const word of _words ) {
162+ encodeTextToUtf8_32Into ( word , buffer ) ;
163+ }
164+ }
165+ } ) ;
166+
167+ test ( `encodeTextToUtf8_32(word) to array words (${ words . length } )` , ( ) => {
168+ const _words = words ;
169+ const buffer : number [ ] = new Array ( 100 ) ;
170+ for ( let i = iterations ; i > 0 ; -- i ) {
171+ for ( const word of _words ) {
172+ const len = word . length ;
173+ let j = 0 ;
174+ for ( let p = { text : word , offset : 0 } ; p . offset < len ; ) {
175+ buffer [ j ++ ] = encodeTextToUtf8_32 ( p ) ;
176+ }
177+ }
178+ }
179+ } ) ;
180+
127181 test ( `encoder.encode(word) to array words (${ words . length } )` , ( ) => {
128182 const _words = words ;
129183 for ( let i = iterations ; i > 0 ; -- i ) {
@@ -361,3 +415,42 @@ function sampleText() {
361415 ` ;
362416 // cspell:enable
363417}
418+
419+ const textEncoder = new TextEncoder ( ) ;
420+ const charMap : Record < string , number > = Object . create ( null ) ;
421+
422+ function encodeChar ( char : string ) : number {
423+ const bytes = textEncoder . encode ( char ) ;
424+ let code = 0 ;
425+ for ( let i = bytes . length - 1 ; i >= 0 ; i -- ) {
426+ code = ( code << 8 ) | bytes [ i ] ;
427+ }
428+ return code ;
429+ }
430+
431+ function toUtf8Array ( text : string ) : number [ ] {
432+ const src : string [ ] = [ ...text ] ;
433+ const dst : number [ ] = src as unknown as number [ ] ;
434+
435+ for ( let i = 0 ; i < src . length ; i ++ ) {
436+ const char = src [ i ] ;
437+ let code = charMap [ char ] ;
438+ if ( code === undefined ) {
439+ code = encodeChar ( char ) ;
440+ charMap [ char ] = code ;
441+ }
442+ dst [ i ] = code ;
443+ }
444+ return dst ;
445+ }
446+
447+ function toCodePoints ( text : string ) : number [ ] {
448+ const src : string [ ] = [ ...text ] ;
449+ const dst : number [ ] = src as unknown as number [ ] ;
450+
451+ for ( let i = 0 ; i < src . length ; i ++ ) {
452+ const char = src [ i ] ;
453+ dst [ i ] = char . codePointAt ( 0 ) || 0 ;
454+ }
455+ return dst ;
456+ }
0 commit comments