@@ -236,9 +236,13 @@ export class Assembler {
236236 this . localGet ( 'tmp' ) ;
237237 }
238238
239- nextCharCode ( ) {
239+ currCharCode ( ) {
240240 this . globalGet ( 'pos' ) ;
241- this . i32Load8u ( ) ;
241+ this . i32Load8u ( Compiler . INPUT_BUFFER_OFFSET ) ;
242+ }
243+
244+ nextCharCode ( ) {
245+ this . currCharCode ( ) ;
242246 this . incPos ( ) ;
243247 }
244248
@@ -410,6 +414,9 @@ export class Compiler {
410414 asm . addGlobal ( 'pos' , w . valtype . i32 , w . mut . var , ( ) => asm . i32Const ( 0 ) ) ;
411415 asm . addGlobal ( 'sp' , w . valtype . i32 , w . mut . var , ( ) => asm . i32Const ( 0 ) ) ;
412416 asm . addGlobal ( 'cst' , w . valtype . i32 , w . mut . var , ( ) => asm . i32Const ( 0 ) ) ;
417+ asm . addGlobal ( 'cstBase' , w . valtype . i32 , w . mut . var , ( ) =>
418+ asm . i32Const ( Compiler . CST_START_OFFSET ) ,
419+ ) ;
413420 asm . addGlobal ( 'depth' , w . valtype . i32 , w . mut . var , ( ) => asm . i32Const ( 0 ) ) ;
414421
415422 // Reserve a fixed number of imports for debug labels.
@@ -463,16 +470,11 @@ export class Compiler {
463470 exports . push ( w . export_ ( name , [ 0x03 , this . asm . globalidx ( name ) ] ) ) ;
464471 }
465472
466- // Memory layout:
467- // - First page is for input buffer (growing upwards) and origPos stack
468- // (growing downwards).
469- // - Second page is for CST.
470-
471473 const mod = w . module ( [
472474 w . typesec ( types ) ,
473475 w . importsec ( imports ) ,
474476 w . funcsec ( funcs ) ,
475- w . memsec ( [ w . mem ( w . memtype ( w . limits . min ( 8 ) ) ) ] ) ,
477+ w . memsec ( [ w . mem ( w . memtype ( w . limits . min ( 24 ) ) ) ] ) ,
476478 w . globalsec ( globals ) ,
477479 w . exportsec ( exports ) ,
478480 w . codesec ( codes ) ,
@@ -537,17 +539,17 @@ export class Compiler {
537539 asm . i32Const ( 0 ) ;
538540 asm . globalSet ( 'pos' ) ;
539541
540- asm . i32Const ( 64 * 1024 ) ;
542+ asm . i32Const ( Compiler . STACK_START_OFFSET ) ;
541543 asm . globalSet ( 'sp' ) ;
542544
543- asm . i32Const ( 64 * 1024 ) ;
545+ asm . i32Const ( Compiler . CST_START_OFFSET ) ;
544546 asm . globalSet ( 'cst' ) ;
545547
546548 asm . i32Const ( 0 ) ; // offset
547549 asm . i32Const ( 64 * 1024 ) ; // maxLen
548- asm . emit ( instr . call , w . funcidx ( 0 ) ) ;
549-
550+ asm . emit ( instr . call , w . funcidx ( 0 ) ) ; // fillInputBuffer
550551 asm . emit ( instr . local . set , w . localidx ( 0 ) ) ; // set inputLen
552+
551553 asm . emit ( instr . call , this . ruleEvalFuncIdx ( this . grammar . defaultStartRule ) ) ;
552554 asm . ifElse (
553555 w . blocktype . i32 ,
@@ -688,8 +690,7 @@ export class Compiler {
688690 const { asm} = this ;
689691 asm . i32Const ( 0xff ) ;
690692 // Careful! We shouldn't move the pos here. Or does it matter?
691- asm . globalGet ( 'pos' ) ;
692- asm . i32Load8u ( ) ;
693+ asm . currCharCode ( ) ;
693694 asm . emit ( instr . i32 . eq ) ;
694695 asm . localSet ( 'ret' ) ;
695696 }
@@ -794,15 +795,11 @@ export class Compiler {
794795 // - handle longer terminals with a loop
795796
796797 const { asm} = this ;
797- const currCharCode = ( ) => {
798- asm . globalGet ( 'pos' ) ;
799- asm . i32Load8u ( ) ;
800- } ;
801798
802799 for ( const c of [ ...exp . obj ] ) {
803800 // Compare next char
804801 asm . i32Const ( c . charCodeAt ( 0 ) ) ;
805- currCharCode ( ) ;
802+ asm . currCharCode ( ) ;
806803 asm . i32Ne ( ) ;
807804 asm . if ( w . blocktype . empty , ( ) => {
808805 asm . i32Const ( 0 ) ;
@@ -815,6 +812,15 @@ export class Compiler {
815812 asm . localSet ( 'ret' ) ;
816813 }
817814}
815+ // Memory layout:
816+ // - First page is for the PExpr stack (origPos, etc.), growing downards.
817+ // - 2nd page is for input buffer (max 64k for now).
818+ // - Pages 3-18 (incl.) for memo table (4 entries per char, 4 bytes each).
819+ // - Remainder (>18) is for CST (growing upwards).
820+ Compiler . INPUT_BUFFER_OFFSET = 64 * 1024 ; // Offset of the input buffer in memory.
821+ Compiler . STACK_START_OFFSET = 64 * 1024 ; // Starting offset of the stack.
822+ Compiler . MEMO_START_OFFSET = 2 * ( 64 * 1024 ) ; // Starting offset of memo records.
823+ Compiler . CST_START_OFFSET = 18 * ( 64 * 1024 ) ; // Starting offset of CST records.
818824
819825export class WasmMatcher {
820826 constructor ( grammar ) {
@@ -867,8 +873,9 @@ export class WasmMatcher {
867873 _fillInputBuffer ( offset , maxLen ) {
868874 const encoder = new TextEncoder ( ) ;
869875 const { memory} = this . _instance . exports ;
870- const buf = new Uint8Array ( memory . buffer , offset ) ;
876+ const buf = new Uint8Array ( memory . buffer , Compiler . INPUT_BUFFER_OFFSET + offset ) ;
871877 const { read, written} = encoder . encodeInto ( this . _input . substring ( this . _pos ) , buf ) ;
878+ assert ( written < 64 * 1024 , 'Input too long' ) ;
872879 this . _pos += read ;
873880 buf [ written ] = 0xff ; // Mark end of input with an invalid UTF-8 character.
874881 return written ;
0 commit comments