|
1 |
| -import Parser, { Query } from "tree-sitter"; |
2 |
| -import JavaScript from "tree-sitter-javascript"; |
3 |
| -import regexpuc from 'regexpu-core'; |
| 1 | +import regexpuc from "regexpu-core"; |
| 2 | +import { parse } from "acorn"; |
| 3 | +import MagicString from "magic-string"; |
| 4 | +import { simple as simpleWalk } from "acorn-walk"; |
4 | 5 |
|
5 |
| -function findRegexLiterals(source) { |
6 |
| - const parser = new Parser(); |
7 |
| - parser.setLanguage(JavaScript); |
| 6 | +const PREAMBLE = `;{ const precompileRegex = (r) => { r.exec('a'); r.exec('\\u1000'); }; `; |
| 7 | +const POSTAMBLE = "; };"; |
8 | 8 |
|
9 |
| - const tree = parser.parse(source); |
10 |
| - const query = new Query( |
11 |
| - JavaScript, |
12 |
| - "(regex pattern: (regex_pattern) @pattern flags: (regex_flags)? @flags)" |
13 |
| - ); |
14 |
| - const regexLiterals = []; |
15 |
| - for (const m of query.matches(tree.rootNode)) { |
16 |
| - const pattern = m.captures[0].node.text; |
17 |
| - const flags = m.captures[1]?.node.text || ""; |
18 |
| - // transpile unicode property escapes |
19 |
| - let patternTranspiled; |
20 |
| - try { |
21 |
| - patternTranspiled = regexpuc(pattern, flags, { unicodePropertyEscapes: 'transform' }); |
22 |
| - } catch { |
23 |
| - // swallow regex parse errors here to instead throw them at the engine level |
24 |
| - // this then also avoids regex parser bugs being thrown unnecessarily |
25 |
| - patternTranspiled = pattern; |
26 |
| - } |
27 |
| - regexLiterals.push({ |
28 |
| - patternStart: m.captures[0].node.startIndex, |
29 |
| - patternEnd: m.captures[0].node.endIndex, |
30 |
| - pattern, |
31 |
| - patternTranspiled, |
32 |
| - flags, |
33 |
| - flagsStart: m.captures[1]?.node.startIndex, |
34 |
| - flagsEnd: m.captures[1]?.node.endIndex, |
35 |
| - }); |
36 |
| - } |
37 |
| - return regexLiterals; |
38 |
| -} |
39 |
| - |
40 |
| -const PREAMBLE = `;{ |
41 |
| - // Precompiled regular expressions |
42 |
| - const precompile = (r) => { r.exec('a'); r.exec('\\u1000'); };`; |
43 |
| -const POSTAMBLE = "}"; |
44 |
| - |
45 |
| -// TODO: This should also detect and update sourcemaps if they are present, otherwise the sourcemaps would be incorrect. |
46 |
| -// We could use https://github.com/rich-harris/magic-string to create and/or update sourcemaps |
47 |
| -// |
48 | 9 | /// Emit a block of javascript that will pre-compile the regular expressions given. As spidermonkey
|
49 | 10 | /// will intern regular expressions, duplicating them at the top level and testing them with both
|
50 | 11 | /// an ascii and utf8 string should ensure that they won't be re-compiled when run in the fetch
|
51 | 12 | /// handler.
|
52 |
| -export function precompile(inputApplication) { |
53 |
| - let lits = findRegexLiterals(inputApplication); |
| 13 | +export function precompile(source, filename = "<input>") { |
| 14 | + const magicString = new MagicString(source, { |
| 15 | + filename, |
| 16 | + }); |
| 17 | + |
| 18 | + const ast = parse(source, { |
| 19 | + ecmaVersion: "latest", |
| 20 | + sourceType: "script", |
| 21 | + }); |
| 22 | + |
| 23 | + const precompileCalls = []; |
| 24 | + simpleWalk(ast, { |
| 25 | + Literal(node) { |
| 26 | + if (!node.regex) return; |
| 27 | + let transpiledPattern; |
| 28 | + try { |
| 29 | + transpiledPattern = regexpuc(node.regex.pattern, node.regex.flags, { |
| 30 | + unicodePropertyEscapes: "transform", |
| 31 | + }); |
| 32 | + } catch { |
| 33 | + // swallow regex parse errors here to instead throw them at the engine level |
| 34 | + // this then also avoids regex parser bugs being thrown unnecessarily |
| 35 | + transpiledPattern = pattern; |
| 36 | + } |
| 37 | + const transpiledRegex = `/${transpiledPattern}/${regex.flags}`; |
| 38 | + precompileCalls.push(`precompile(${transpiledRegex});`); |
| 39 | + magicString.overwrite(node.start, node.end, tranpiledRegex); |
| 40 | + }, |
| 41 | + }); |
| 42 | + |
| 43 | + if (!precompileCalls.length) return source; |
54 | 44 |
|
55 |
| - if (lits.length === 0) { |
56 |
| - return inputApplication; |
57 |
| - } |
| 45 | + // by keeping this a one-liner, source maps will align since they use line offsets |
| 46 | + magicString.prepend(`${PREAMBLE}${precompileCalls.join(";")}${POSTAMBLE}`); |
58 | 47 |
|
59 |
| - let offset = 0; |
60 |
| - for (const lit of lits) { |
61 |
| - if (lit.pattern === lit.patternTranspiled) |
62 |
| - continue; |
63 |
| - inputApplication = inputApplication.slice(0, lit.patternStart + offset) + lit.patternTranspiled + inputApplication.slice(lit.patternEnd + offset); |
64 |
| - offset += lit.patternTranspiled.length - lit.pattern.length; |
65 |
| - } |
| 48 | + // When we're ready to pipe in source maps: |
| 49 | + // const map = magicString.generateMap({ |
| 50 | + // source: 'source.js', |
| 51 | + // file: 'converted.js.map', |
| 52 | + // includeContent: true |
| 53 | + // }); |
66 | 54 |
|
67 |
| - return ( |
68 |
| - PREAMBLE + |
69 |
| - lits |
70 |
| - .map((regex) => { |
71 |
| - return `precompile(/${regex.patternTranspiled}/${regex.flags});`; |
72 |
| - }) |
73 |
| - .join("\n") + |
74 |
| - POSTAMBLE + inputApplication |
75 |
| - ); |
| 55 | + console.log(magicString.toString()); |
| 56 | + return magicString.toString(); |
76 | 57 | }
|
0 commit comments