Skip to content

Commit aa866e8

Browse files
committed
Change memory layout to make room for memo table.
1 parent 2f5c107 commit aa866e8

File tree

2 files changed

+33
-25
lines changed

2 files changed

+33
-25
lines changed

packages/wasm/src/index.js

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -236,9 +236,13 @@ export class Assembler {
236236
this.localGet('tmp');
237237
}
238238

239-
nextCharCode() {
239+
currCharCode() {
240240
this.globalGet('pos');
241-
this.i32Load8u();
241+
this.i32Load8u(Compiler.INPUT_BUFFER_OFFSET);
242+
}
243+
244+
nextCharCode() {
245+
this.currCharCode();
242246
this.incPos();
243247
}
244248

@@ -410,6 +414,9 @@ export class Compiler {
410414
asm.addGlobal('pos', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
411415
asm.addGlobal('sp', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
412416
asm.addGlobal('cst', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
417+
asm.addGlobal('cstBase', w.valtype.i32, w.mut.var, () =>
418+
asm.i32Const(Compiler.CST_START_OFFSET),
419+
);
413420
asm.addGlobal('depth', w.valtype.i32, w.mut.var, () => asm.i32Const(0));
414421

415422
// Reserve a fixed number of imports for debug labels.
@@ -463,16 +470,11 @@ export class Compiler {
463470
exports.push(w.export_(name, [0x03, this.asm.globalidx(name)]));
464471
}
465472

466-
// Memory layout:
467-
// - First page is for input buffer (growing upwards) and origPos stack
468-
// (growing downwards).
469-
// - Second page is for CST.
470-
471473
const mod = w.module([
472474
w.typesec(types),
473475
w.importsec(imports),
474476
w.funcsec(funcs),
475-
w.memsec([w.mem(w.memtype(w.limits.min(8)))]),
477+
w.memsec([w.mem(w.memtype(w.limits.min(24)))]),
476478
w.globalsec(globals),
477479
w.exportsec(exports),
478480
w.codesec(codes),
@@ -537,17 +539,17 @@ export class Compiler {
537539
asm.i32Const(0);
538540
asm.globalSet('pos');
539541

540-
asm.i32Const(64 * 1024);
542+
asm.i32Const(Compiler.STACK_START_OFFSET);
541543
asm.globalSet('sp');
542544

543-
asm.i32Const(64 * 1024);
545+
asm.i32Const(Compiler.CST_START_OFFSET);
544546
asm.globalSet('cst');
545547

546548
asm.i32Const(0); // offset
547549
asm.i32Const(64 * 1024); // maxLen
548-
asm.emit(instr.call, w.funcidx(0));
549-
550+
asm.emit(instr.call, w.funcidx(0)); // fillInputBuffer
550551
asm.emit(instr.local.set, w.localidx(0)); // set inputLen
552+
551553
asm.emit(instr.call, this.ruleEvalFuncIdx(this.grammar.defaultStartRule));
552554
asm.ifElse(
553555
w.blocktype.i32,
@@ -688,8 +690,7 @@ export class Compiler {
688690
const {asm} = this;
689691
asm.i32Const(0xff);
690692
// Careful! We shouldn't move the pos here. Or does it matter?
691-
asm.globalGet('pos');
692-
asm.i32Load8u();
693+
asm.currCharCode();
693694
asm.emit(instr.i32.eq);
694695
asm.localSet('ret');
695696
}
@@ -794,15 +795,11 @@ export class Compiler {
794795
// - handle longer terminals with a loop
795796

796797
const {asm} = this;
797-
const currCharCode = () => {
798-
asm.globalGet('pos');
799-
asm.i32Load8u();
800-
};
801798

802799
for (const c of [...exp.obj]) {
803800
// Compare next char
804801
asm.i32Const(c.charCodeAt(0));
805-
currCharCode();
802+
asm.currCharCode();
806803
asm.i32Ne();
807804
asm.if(w.blocktype.empty, () => {
808805
asm.i32Const(0);
@@ -815,6 +812,15 @@ export class Compiler {
815812
asm.localSet('ret');
816813
}
817814
}
815+
// Memory layout:
816+
// - First page is for the PExpr stack (origPos, etc.), growing downards.
817+
// - 2nd page is for input buffer (max 64k for now).
818+
// - Pages 3-18 (incl.) for memo table (4 entries per char, 4 bytes each).
819+
// - Remainder (>18) is for CST (growing upwards).
820+
Compiler.INPUT_BUFFER_OFFSET = 64 * 1024; // Offset of the input buffer in memory.
821+
Compiler.STACK_START_OFFSET = 64 * 1024; // Starting offset of the stack.
822+
Compiler.MEMO_START_OFFSET = 2 * (64 * 1024); // Starting offset of memo records.
823+
Compiler.CST_START_OFFSET = 18 * (64 * 1024); // Starting offset of CST records.
818824

819825
export class WasmMatcher {
820826
constructor(grammar) {
@@ -867,8 +873,9 @@ export class WasmMatcher {
867873
_fillInputBuffer(offset, maxLen) {
868874
const encoder = new TextEncoder();
869875
const {memory} = this._instance.exports;
870-
const buf = new Uint8Array(memory.buffer, offset);
876+
const buf = new Uint8Array(memory.buffer, Compiler.INPUT_BUFFER_OFFSET + offset);
871877
const {read, written} = encoder.encodeInto(this._input.substring(this._pos), buf);
878+
assert(written < 64 * 1024, 'Input too long');
872879
this._pos += read;
873880
buf[written] = 0xff; // Mark end of input with an invalid UTF-8 character.
874881
return written;

packages/wasm/test/test-wasm.js

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ const matchWithInput = (m, str) => (m.setInput(str), m.match());
99
const indented = (d, str) => new Array(d * 2).join(' ') + str;
1010

1111
const BYTES_PER_CST_REC = 8;
12-
const CST_BASE = 64 * 1024;
1312

1413
function rawCst(matcher) {
1514
const view = new DataView(matcher._instance.exports.memory.buffer);
15+
const cstBase = matcher._instance.exports.cstBase.value;
1616
const cstTop = matcher._instance.exports.cst.value;
1717
const ans = [];
18-
for (let offset = CST_BASE; offset < cstTop; offset += BYTES_PER_CST_REC) {
18+
for (let offset = cstBase; offset < cstTop; offset += BYTES_PER_CST_REC) {
1919
const depth = view.getUint32(offset, true);
2020
const matchLen = view.getUint32(offset + 4, true);
2121
ans.push([depth, matchLen]);
@@ -25,13 +25,14 @@ function rawCst(matcher) {
2525

2626
// eslint-disable-next-line no-unused-vars
2727
function cstToString(matcher, input) {
28-
const top = matcher._instance.exports.cst.value;
28+
const cstBase = matcher._instance.exports.cstBase.value;
29+
const cstTop = matcher._instance.exports.cst.value;
2930

3031
const view = new DataView(matcher._instance.exports.memory.buffer);
3132
let pos = 0;
3233
const tree = [[0, -1, 0]];
3334
const lines = [];
34-
for (let p = CST_BASE; p < top; p += BYTES_PER_CST_REC) {
35+
for (let p = cstBase; p < cstTop; p += BYTES_PER_CST_REC) {
3536
// const [lastDepth, lastMatchLen] = depthStack.at(-1);
3637

3738
const depth = view.getUint32(p, true);
@@ -54,7 +55,7 @@ test('input in memory', async t => {
5455
matcher.setInput('ohm');
5556
matcher.match(); // Trigger fillInputBuffer
5657

57-
const view = new DataView(matcher._instance.exports.memory.buffer);
58+
const view = new DataView(matcher._instance.exports.memory.buffer, 64 * 1024);
5859
t.is(view.getUint8(0), 'ohm'.charCodeAt(0));
5960
t.is(view.getUint8(1), 'ohm'.charCodeAt(1));
6061
t.is(view.getUint8(2), 'ohm'.charCodeAt(2));

0 commit comments

Comments
 (0)