Skip to content

Commit 08a0695

Browse files
Guy BedfordJakeChampion
authored andcommitted
feat: replace tree-sitter with acorn + magic string
1 parent 97d65aa commit 08a0695

File tree

3 files changed

+76
-452
lines changed

3 files changed

+76
-452
lines changed

package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,16 @@
3939
"brittle": "^3.1.1",
4040
"eslint": "^8.28.0",
4141
"get-bin-path": "^7.2.1",
42+
"magic-string": "^0.30.0",
4243
"tsd": "^0.25.0",
4344
"typescript": "^4.9"
4445
},
4546
"dependencies": {
4647
"@bytecodealliance/jco": "^0.5.2",
4748
"@bytecodealliance/wizer": "^1.6.1-beta.4",
49+
"acorn": "^8.8.2",
50+
"acorn-walk": "^8.2.0",
4851
"esbuild": "^0.15.16",
49-
"regexpu-core": "^5.3.1",
50-
"tree-sitter": "^0.20.1",
51-
"tree-sitter-javascript": "^0.19.0"
52+
"regexpu-core": "^5.3.1"
5253
}
5354
}

src/precompile.js

Lines changed: 47 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,57 @@
1-
import Parser, { Query } from "tree-sitter";
2-
import JavaScript from "tree-sitter-javascript";
3-
import regexpuc from 'regexpu-core';
1+
import regexpuc from "regexpu-core";
2+
import { parse } from "acorn";
3+
import MagicString from "magic-string";
4+
import { simple as simpleWalk } from "acorn-walk";
45

5-
function findRegexLiterals(source) {
6-
const parser = new Parser();
7-
parser.setLanguage(JavaScript);
6+
const PREAMBLE = `;{ const precompileRegex = (r) => { r.exec('a'); r.exec('\\u1000'); }; `;
7+
const POSTAMBLE = "; };";
88

9-
const tree = parser.parse(source);
10-
const query = new Query(
11-
JavaScript,
12-
"(regex pattern: (regex_pattern) @pattern flags: (regex_flags)? @flags)"
13-
);
14-
const regexLiterals = [];
15-
for (const m of query.matches(tree.rootNode)) {
16-
const pattern = m.captures[0].node.text;
17-
const flags = m.captures[1]?.node.text || "";
18-
// transpile unicode property escapes
19-
let patternTranspiled;
20-
try {
21-
patternTranspiled = regexpuc(pattern, flags, { unicodePropertyEscapes: 'transform' });
22-
} catch {
23-
// swallow regex parse errors here to instead throw them at the engine level
24-
// this then also avoids regex parser bugs being thrown unnecessarily
25-
patternTranspiled = pattern;
26-
}
27-
regexLiterals.push({
28-
patternStart: m.captures[0].node.startIndex,
29-
patternEnd: m.captures[0].node.endIndex,
30-
pattern,
31-
patternTranspiled,
32-
flags,
33-
flagsStart: m.captures[1]?.node.startIndex,
34-
flagsEnd: m.captures[1]?.node.endIndex,
35-
});
36-
}
37-
return regexLiterals;
38-
}
39-
40-
const PREAMBLE = `;{
41-
// Precompiled regular expressions
42-
const precompile = (r) => { r.exec('a'); r.exec('\\u1000'); };`;
43-
const POSTAMBLE = "}";
44-
45-
// TODO: This should also detect and update sourcemaps if they are present, otherwise the sourcemaps would be incorrect.
46-
// We could use https://github.com/rich-harris/magic-string to create and/or update sourcemaps
47-
//
489
/// Emit a block of javascript that will pre-compile the regular expressions given. As spidermonkey
4910
/// will intern regular expressions, duplicating them at the top level and testing them with both
5011
/// an ascii and utf8 string should ensure that they won't be re-compiled when run in the fetch
5112
/// handler.
52-
export function precompile(inputApplication) {
53-
let lits = findRegexLiterals(inputApplication);
13+
export function precompile(source, filename = "<input>") {
14+
const magicString = new MagicString(source, {
15+
filename,
16+
});
17+
18+
const ast = parse(source, {
19+
ecmaVersion: "latest",
20+
sourceType: "script",
21+
});
22+
23+
const precompileCalls = [];
24+
simpleWalk(ast, {
25+
Literal(node) {
26+
if (!node.regex) return;
27+
let transpiledPattern;
28+
try {
29+
transpiledPattern = regexpuc(node.regex.pattern, node.regex.flags, {
30+
unicodePropertyEscapes: "transform",
31+
});
32+
} catch {
33+
// swallow regex parse errors here to instead throw them at the engine level
34+
// this then also avoids regex parser bugs being thrown unnecessarily
35+
transpiledPattern = pattern;
36+
}
37+
const transpiledRegex = `/${transpiledPattern}/${regex.flags}`;
38+
precompileCalls.push(`precompile(${transpiledRegex});`);
39+
magicString.overwrite(node.start, node.end, tranpiledRegex);
40+
},
41+
});
42+
43+
if (!precompileCalls.length) return source;
5444

55-
if (lits.length === 0) {
56-
return inputApplication;
57-
}
45+
// by keeping this a one-liner, source maps will align since they use line offsets
46+
magicString.prepend(`${PREAMBLE}${precompileCalls.join(";")}${POSTAMBLE}`);
5847

59-
let offset = 0;
60-
for (const lit of lits) {
61-
if (lit.pattern === lit.patternTranspiled)
62-
continue;
63-
inputApplication = inputApplication.slice(0, lit.patternStart + offset) + lit.patternTranspiled + inputApplication.slice(lit.patternEnd + offset);
64-
offset += lit.patternTranspiled.length - lit.pattern.length;
65-
}
48+
// When we're ready to pipe in source maps:
49+
// const map = magicString.generateMap({
50+
// source: 'source.js',
51+
// file: 'converted.js.map',
52+
// includeContent: true
53+
// });
6654

67-
return (
68-
PREAMBLE +
69-
lits
70-
.map((regex) => {
71-
return `precompile(/${regex.patternTranspiled}/${regex.flags});`;
72-
})
73-
.join("\n") +
74-
POSTAMBLE + inputApplication
75-
);
55+
console.log(magicString.toString());
56+
return magicString.toString();
7657
}

0 commit comments

Comments
 (0)