Skip to content

Commit 21bd514

Browse files
authored
🎨 Convert RegExp objects to Sets (#8)
1 parent d180814 commit 21bd514

File tree

4 files changed

+1619
-438
lines changed

4 files changed

+1619
-438
lines changed

‎scripts/update-unicode-ids.ts

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,21 @@ const logger = console
5151
normalizeRanges(idContinueLarge)
5252

5353
logger.log("Generating code...")
54+
const { set: setStart, ranges: rangesStart } = makeLargePattern(
55+
idStartLarge,
56+
)
57+
58+
const { set: setContinue, ranges: rangesContinue } = makeLargePattern(
59+
idContinueLarge,
60+
)
61+
5462
let code = `${banner}
5563
56-
let largeIdStartPattern: RegExp | null = null;
57-
let largeIdContinuePattern: RegExp | null = null;
64+
let largeIdStartPatternSymbols: Set<number> | null = null
65+
let largeIdStartPatternRanges: [number, number][] | null = null
66+
67+
let largeIdContinuePatternSymbols: Set<number> | null = null
68+
let largeIdContinuePatternRanges: [number, number][] | null = null
5869
5970
export function isIdStart(cp: number): boolean {
6071
${makeSmallCondtion(idStartSmall)}
@@ -65,18 +76,18 @@ export function isIdContinue(cp: number): boolean {
6576
return isLargeIdStart(cp) || isLargeIdContinue(cp)
6677
}
6778
function isLargeIdStart(cp: number): boolean {
68-
if (!largeIdStartPattern) {
69-
largeIdStartPattern = new RegExp(${makeLargePattern(idStartLarge)}, "u")
79+
if (largeIdStartPatternSymbols === null) {
80+
largeIdStartPatternSymbols = ${setStart};
81+
largeIdStartPatternRanges = ${rangesStart};
7082
}
71-
return largeIdStartPattern.test(String.fromCodePoint(cp))
83+
return largeIdStartPatternSymbols.has(cp) || largeIdStartPatternRanges!.some(([r1, r2]) => r1 <= cp && cp <= r2);
7284
}
7385
function isLargeIdContinue(cp: number): boolean {
74-
if (!largeIdContinuePattern) {
75-
largeIdContinuePattern = new RegExp(${makeLargePattern(
76-
idContinueLarge,
77-
)}, "u")
86+
if (largeIdContinuePatternSymbols === null) {
87+
largeIdContinuePatternSymbols = ${setContinue};
88+
largeIdContinuePatternRanges = ${rangesContinue};
7889
}
79-
return largeIdContinuePattern.test(String.fromCodePoint(cp))
90+
return largeIdContinuePatternSymbols.has(cp) || largeIdContinuePatternRanges!.some(([r1, r2]) => r1 <= cp && cp <= r2);
8091
}`
8192

8293
logger.log("Formatting code...")
@@ -147,35 +158,24 @@ function makeSmallCondtion(ranges: [number, number][]): string {
147158
return conditions.join("\n")
148159
}
149160

150-
function makeLargePattern(ranges: [number, number][]): string {
151-
const lines = ["^["]
161+
function makeLargePattern(ranges: [number, number][]) {
162+
const symbols: string[] = []
163+
const symbolRanges: string[] = []
164+
152165
for (const [min, max] of ranges) {
153-
const line = lines[lines.length - 1]
154-
const part =
155-
min === max
156-
? esc(min)
157-
: min + 1 === max
158-
? `${esc(min)}${esc(max)}`
159-
: `${esc(min)}-${esc(max)}`
160-
161-
if (line.length + part.length > 60) {
162-
lines.push(part)
166+
if (min === max) {
167+
symbols.push(`0x${min.toString(16)}`)
168+
} else if (min + 1 === max) {
169+
symbols.push(`0x${min.toString(16)}`, `0x${max.toString(16)}`)
163170
} else {
164-
lines[lines.length - 1] += part
171+
symbolRanges.push(`[0x${min.toString(16)}, 0x${max.toString(16)}]`)
165172
}
166173
}
167-
lines[lines.length - 1] += "]$"
168-
return lines.map(line => `"${line}"`).join("+")
169-
}
170174

171-
function esc(cp: number): string {
172-
if (cp <= 0xff) {
173-
return `\\x${cp.toString(16).padStart(2, "0")}`
174-
}
175-
if (cp <= 0xffff) {
176-
return `\\u${cp.toString(16).padStart(4, "0")}`
175+
return {
176+
set: `new Set([${symbols.join()}])`,
177+
ranges: `[${symbolRanges.join()}]`,
177178
}
178-
return `\\u{${cp.toString(16)}}`
179179
}
180180

181181
function save(content: string): Promise<void> {

‎scripts/update-unicode-properties.ts

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -87,34 +87,34 @@ type Datum = {
8787
logger.log("Generating code...")
8888
let code = `/* This file was generated with ECMAScript specifications. */
8989
90-
const gcNamePattern = /^(?:General_Category|gc)$/u
91-
const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u
90+
const gcNamePattern = new Set(["General_Category", "gc"])
91+
const scNamePattern = new Set(["Script", "Script_Extensions", "sc", "scx"])
9292
const gcValuePatterns = {
9393
${Array.from(
9494
Object.keys(data),
95-
version => `es${version}: null as RegExp | null,`,
95+
version => `es${version}: null as Set<string> | null,`,
9696
).join("\n")}
9797
}
9898
const scValuePatterns = {
9999
${Array.from(
100100
Object.keys(data),
101-
version => `es${version}: null as RegExp | null,`,
101+
version => `es${version}: null as Set<string> | null,`,
102102
).join("\n")}
103103
}
104104
const binPropertyPatterns = {
105105
${Array.from(
106106
Object.keys(data),
107-
version => `es${version}: null as RegExp | null,`,
107+
version => `es${version}: null as Set<string> | null,`,
108108
).join("\n")}
109109
}
110110
111111
export function isValidUnicodeProperty(version: number, name: string, value: string): boolean {
112-
if (gcNamePattern.test(name)) {
112+
if (gcNamePattern.has(name)) {
113113
${Array.from(Object.entries(data), ([version, { gcValues }]) =>
114114
makeVerificationCode(version, "gcValuePatterns", gcValues, 52),
115115
).join("\n")}
116116
}
117-
if (scNamePattern.test(name)) {
117+
if (scNamePattern.has(name)) {
118118
${Array.from(Object.entries(data), ([version, { scValues }]) =>
119119
makeVerificationCode(version, "scValuePatterns", scValues, 52),
120120
).join("\n")}
@@ -175,35 +175,18 @@ function makeVerificationCode(
175175

176176
return `
177177
if (version >= ${version}) {
178-
if (!${patternVar}.es${version}) {
179-
${patternVar}.es${version} = new RegExp(
180-
${makeRegExpPatternCode(values, maxLen)},
181-
"u"
182-
)
178+
if (${patternVar}.es${version} === null) {
179+
${patternVar}.es${version} = new Set([${values
180+
.map(v => `"${v}"`)
181+
.join()}])
183182
}
184-
if (${patternVar}.es${version}.test(value)) {
183+
if (${patternVar}.es${version}.has(value)) {
185184
return true
186185
}
187186
}
188187
`
189188
}
190189

191-
function makeRegExpPatternCode(names: string[], maxLen: number): string {
192-
const lines = ["^(?:"]
193-
for (const name of names) {
194-
const line = lines[lines.length - 1]
195-
const part = `${name}|`
196-
197-
if (line.length + part.length > maxLen) {
198-
lines.push(part)
199-
} else {
200-
lines[lines.length - 1] += part
201-
}
202-
}
203-
lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$`
204-
return lines.map(line => `"${line}"`).join("+")
205-
}
206-
207190
function save(content: string): Promise<void> {
208191
return new Promise((resolve, reject) => {
209192
fs.writeFile(FILE_PATH, content, error =>

0 commit comments

Comments
 (0)