Skip to content

Commit 599f11a

Browse files
authored
perf: inline character classes in hot paths (#106)
1 parent 9228801 commit 599f11a

File tree

2 files changed

+64
-22
lines changed

2 files changed

+64
-22
lines changed

src/char-types.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121
// - Cache-friendly (128 bytes fits in L1 cache)
2222
// - Supports overlapping categories (e.g., '5' is both digit and hex)
2323

24-
let CHAR_ALPHA = 1 << 0 // 1
25-
let CHAR_DIGIT = 1 << 1 // 2
26-
let CHAR_HEX = 1 << 2 // 4
27-
let CHAR_WHITESPACE = 1 << 3 // 8
28-
let CHAR_NEWLINE = 1 << 4 // 16
24+
export let CHAR_ALPHA = 1 << 0 // 1
25+
export let CHAR_DIGIT = 1 << 1 // 2
26+
export let CHAR_HEX = 1 << 2 // 4
27+
export let CHAR_WHITESPACE = 1 << 3 // 8
28+
export let CHAR_NEWLINE = 1 << 4 // 16
2929

3030
// Lookup table for ASCII characters (0-127)
31-
let char_types = new Uint8Array(128)
31+
export let char_types = new Uint8Array(128)
3232

3333
// Initialize digit characters (0-9)
3434
for (let i = 0x30; i <= 0x39; i++) {

src/tokenize.ts

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
import { is_digit, is_hex_digit, is_ident_start, is_ident_char, is_whitespace, is_newline } from './char-types'
1+
import {
2+
is_hex_digit,
3+
is_ident_start,
4+
is_ident_char,
5+
is_whitespace,
6+
is_newline,
7+
char_types,
8+
CHAR_DIGIT,
9+
CHAR_WHITESPACE,
10+
CHAR_NEWLINE,
11+
} from './char-types'
212
import {
313
TOKEN_IDENT,
414
TOKEN_FUNCTION,
@@ -101,7 +111,8 @@ export class Lexer {
101111
if (skip_whitespace) {
102112
while (this.pos < this.source.length) {
103113
let ch = this.source.charCodeAt(this.pos)
104-
if (!is_whitespace(ch) && !is_newline(ch)) break
114+
// Hot path: inline whitespace/newline check in tight loop
115+
if (ch >= 128 || (char_types[ch] & (CHAR_WHITESPACE | CHAR_NEWLINE)) === 0) break
105116
this.advance()
106117
}
107118
}
@@ -147,7 +158,8 @@ export class Lexer {
147158
}
148159

149160
// Whitespace
150-
if (is_whitespace(ch) || is_newline(ch)) {
161+
// Hot path: inline whitespace/newline check
162+
if (ch < 128 && (char_types[ch] & (CHAR_WHITESPACE | CHAR_NEWLINE)) !== 0) {
151163
return this.consume_whitespace(start_line, start_column)
152164
}
153165

@@ -176,12 +188,17 @@ export class Lexer {
176188
}
177189

178190
// Numbers: digit or . followed by digit
179-
if (is_digit(ch)) {
191+
// Hot path: inline digit check to eliminate function call overhead
192+
if (ch < 128 && (char_types[ch] & CHAR_DIGIT) !== 0) {
180193
return this.consume_number(start_line, start_column)
181194
}
182195

183-
if (ch === CHAR_DOT && is_digit(this.peek())) {
184-
return this.consume_number(start_line, start_column)
196+
// Hot path: inline digit check for decimal detection
197+
if (ch === CHAR_DOT) {
198+
let next = this.peek()
199+
if (next < 128 && (char_types[next] & CHAR_DIGIT) !== 0) {
200+
return this.consume_number(start_line, start_column)
201+
}
185202
}
186203

187204
// CDO: <!--
@@ -234,9 +251,17 @@ export class Lexer {
234251
// Hyphen/Plus: could be signed number like -5 or +5
235252
if (ch === CHAR_HYPHEN || ch === CHAR_PLUS) {
236253
let next = this.peek()
237-
if (is_digit(next) || (next === CHAR_DOT && is_digit(this.peek(2)))) {
254+
// Hot path: inline digit checks for signed number detection
255+
let is_next_digit = next < 128 && (char_types[next] & CHAR_DIGIT) !== 0
256+
if (is_next_digit) {
238257
return this.consume_number(start_line, start_column)
239258
}
259+
if (next === CHAR_DOT) {
260+
let next2 = this.peek(2)
261+
if (next2 < 128 && (char_types[next2] & CHAR_DIGIT) !== 0) {
262+
return this.consume_number(start_line, start_column)
263+
}
264+
}
240265
}
241266

242267
// Default: delimiter
@@ -248,7 +273,8 @@ export class Lexer {
248273
let start = this.pos
249274
while (this.pos < this.source.length) {
250275
let ch = this.source.charCodeAt(this.pos)
251-
if (!is_whitespace(ch) && !is_newline(ch)) break
276+
// Hot path: inline whitespace/newline check in tight loop
277+
if (ch >= 128 || (char_types[ch] & (CHAR_WHITESPACE | CHAR_NEWLINE)) === 0) break
252278
this.advance()
253279
}
254280
return this.make_token(TOKEN_WHITESPACE, start, this.pos, start_line, start_column)
@@ -341,20 +367,29 @@ export class Lexer {
341367
}
342368

343369
// Integer part
344-
while (this.pos < this.source.length && is_digit(this.source.charCodeAt(this.pos))) {
370+
// Hot path: inline digit check in tight loop
371+
while (this.pos < this.source.length) {
372+
let ch = this.source.charCodeAt(this.pos)
373+
if (ch >= 128 || (char_types[ch] & CHAR_DIGIT) === 0) break
345374
this.advance()
346375
}
347376

348377
// Decimal part
378+
// Hot path: inline digit check for decimal detection
349379
if (
350380
this.pos < this.source.length &&
351381
this.source.charCodeAt(this.pos) === CHAR_DOT &&
352-
this.pos + 1 < this.source.length &&
353-
is_digit(this.peek())
382+
this.pos + 1 < this.source.length
354383
) {
355-
this.advance() // .
356-
while (this.pos < this.source.length && is_digit(this.source.charCodeAt(this.pos))) {
357-
this.advance()
384+
let next = this.peek()
385+
if (next < 128 && (char_types[next] & CHAR_DIGIT) !== 0) {
386+
this.advance() // .
387+
// Hot path: inline digit check in tight loop
388+
while (this.pos < this.source.length) {
389+
let ch = this.source.charCodeAt(this.pos)
390+
if (ch >= 128 || (char_types[ch] & CHAR_DIGIT) === 0) break
391+
this.advance()
392+
}
358393
}
359394
}
360395

@@ -363,15 +398,22 @@ export class Lexer {
363398
let ch = this.source.charCodeAt(this.pos)
364399
if (ch === CHAR_LOWERCASE_E || ch === CHAR_UPPERCASE_E) {
365400
let next = this.peek()
366-
if (is_digit(next) || ((next === CHAR_PLUS || next === CHAR_HYPHEN) && is_digit(this.peek(2)))) {
401+
// Hot path: inline digit checks for exponent detection
402+
let is_next_digit = next < 128 && (char_types[next] & CHAR_DIGIT) !== 0
403+
let next2 = this.peek(2)
404+
let is_next2_digit = next2 < 128 && (char_types[next2] & CHAR_DIGIT) !== 0
405+
if (is_next_digit || ((next === CHAR_PLUS || next === CHAR_HYPHEN) && is_next2_digit)) {
367406
this.advance() // e or E
368407
if (this.pos < this.source.length) {
369408
let sign = this.source.charCodeAt(this.pos)
370409
if (sign === CHAR_PLUS || sign === CHAR_HYPHEN) {
371410
this.advance() // + or -
372411
}
373412
}
374-
while (this.pos < this.source.length && is_digit(this.source.charCodeAt(this.pos))) {
413+
// Hot path: inline digit check in tight loop
414+
while (this.pos < this.source.length) {
415+
let ch = this.source.charCodeAt(this.pos)
416+
if (ch >= 128 || (char_types[ch] & CHAR_DIGIT) === 0) break
375417
this.advance()
376418
}
377419
}

0 commit comments

Comments
 (0)