Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/char-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
// - Cache-friendly (128 bytes fits in L1 cache)
// - Supports overlapping categories (e.g., '5' is both digit and hex)

let CHAR_ALPHA = 1 << 0 // 1
let CHAR_DIGIT = 1 << 1 // 2
let CHAR_HEX = 1 << 2 // 4
let CHAR_WHITESPACE = 1 << 3 // 8
let CHAR_NEWLINE = 1 << 4 // 16
export let CHAR_ALPHA = 1 << 0 // 1
export let CHAR_DIGIT = 1 << 1 // 2
export let CHAR_HEX = 1 << 2 // 4
export let CHAR_WHITESPACE = 1 << 3 // 8
export let CHAR_NEWLINE = 1 << 4 // 16

// Lookup table for ASCII characters (0-127)
let char_types = new Uint8Array(128)
export let char_types = new Uint8Array(128)

// Initialize digit characters (0-9)
for (let i = 0x30; i <= 0x39; i++) {
Expand Down
74 changes: 58 additions & 16 deletions src/tokenize.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
import { is_digit, is_hex_digit, is_ident_start, is_ident_char, is_whitespace, is_newline } from './char-types'
import {
is_hex_digit,
is_ident_start,
is_ident_char,
is_whitespace,
is_newline,
char_types,
CHAR_DIGIT,
CHAR_WHITESPACE,
CHAR_NEWLINE,
} from './char-types'
import {
TOKEN_IDENT,
TOKEN_FUNCTION,
Expand Down Expand Up @@ -101,7 +111,8 @@ export class Lexer {
if (skip_whitespace) {
while (this.pos < this.source.length) {
let ch = this.source.charCodeAt(this.pos)
if (!is_whitespace(ch) && !is_newline(ch)) break
// Hot path: inline whitespace/newline check in tight loop
if (ch >= 128 || (char_types[ch] & (CHAR_WHITESPACE | CHAR_NEWLINE)) === 0) break
this.advance()
}
}
Expand Down Expand Up @@ -147,7 +158,8 @@ export class Lexer {
}

// Whitespace
if (is_whitespace(ch) || is_newline(ch)) {
// Hot path: inline whitespace/newline check
if (ch < 128 && (char_types[ch] & (CHAR_WHITESPACE | CHAR_NEWLINE)) !== 0) {
return this.consume_whitespace(start_line, start_column)
}

Expand Down Expand Up @@ -176,12 +188,17 @@ export class Lexer {
}

// Numbers: digit or . followed by digit
if (is_digit(ch)) {
// Hot path: inline digit check to eliminate function call overhead
if (ch < 128 && (char_types[ch] & CHAR_DIGIT) !== 0) {
return this.consume_number(start_line, start_column)
}

if (ch === CHAR_DOT && is_digit(this.peek())) {
return this.consume_number(start_line, start_column)
// Hot path: inline digit check for decimal detection
if (ch === CHAR_DOT) {
let next = this.peek()
if (next < 128 && (char_types[next] & CHAR_DIGIT) !== 0) {
return this.consume_number(start_line, start_column)
}
}

// CDO: <!--
Expand Down Expand Up @@ -234,9 +251,17 @@ export class Lexer {
// Hyphen/Plus: could be signed number like -5 or +5
if (ch === CHAR_HYPHEN || ch === CHAR_PLUS) {
let next = this.peek()
if (is_digit(next) || (next === CHAR_DOT && is_digit(this.peek(2)))) {
// Hot path: inline digit checks for signed number detection
let is_next_digit = next < 128 && (char_types[next] & CHAR_DIGIT) !== 0
if (is_next_digit) {
return this.consume_number(start_line, start_column)
}
if (next === CHAR_DOT) {
let next2 = this.peek(2)
if (next2 < 128 && (char_types[next2] & CHAR_DIGIT) !== 0) {
return this.consume_number(start_line, start_column)
}
}
}

// Default: delimiter
Expand All @@ -248,7 +273,8 @@ export class Lexer {
let start = this.pos
while (this.pos < this.source.length) {
let ch = this.source.charCodeAt(this.pos)
if (!is_whitespace(ch) && !is_newline(ch)) break
// Hot path: inline whitespace/newline check in tight loop
if (ch >= 128 || (char_types[ch] & (CHAR_WHITESPACE | CHAR_NEWLINE)) === 0) break
this.advance()
}
return this.make_token(TOKEN_WHITESPACE, start, this.pos, start_line, start_column)
Expand Down Expand Up @@ -341,20 +367,29 @@ export class Lexer {
}

// Integer part
while (this.pos < this.source.length && is_digit(this.source.charCodeAt(this.pos))) {
// Hot path: inline digit check in tight loop
while (this.pos < this.source.length) {
let ch = this.source.charCodeAt(this.pos)
if (ch >= 128 || (char_types[ch] & CHAR_DIGIT) === 0) break
this.advance()
}

// Decimal part
// Hot path: inline digit check for decimal detection
if (
this.pos < this.source.length &&
this.source.charCodeAt(this.pos) === CHAR_DOT &&
this.pos + 1 < this.source.length &&
is_digit(this.peek())
this.pos + 1 < this.source.length
) {
this.advance() // .
while (this.pos < this.source.length && is_digit(this.source.charCodeAt(this.pos))) {
this.advance()
let next = this.peek()
if (next < 128 && (char_types[next] & CHAR_DIGIT) !== 0) {
this.advance() // .
// Hot path: inline digit check in tight loop
while (this.pos < this.source.length) {
let ch = this.source.charCodeAt(this.pos)
if (ch >= 128 || (char_types[ch] & CHAR_DIGIT) === 0) break
this.advance()
}
}
}

Expand All @@ -363,15 +398,22 @@ export class Lexer {
let ch = this.source.charCodeAt(this.pos)
if (ch === CHAR_LOWERCASE_E || ch === CHAR_UPPERCASE_E) {
let next = this.peek()
if (is_digit(next) || ((next === CHAR_PLUS || next === CHAR_HYPHEN) && is_digit(this.peek(2)))) {
// Hot path: inline digit checks for exponent detection
let is_next_digit = next < 128 && (char_types[next] & CHAR_DIGIT) !== 0
let next2 = this.peek(2)
let is_next2_digit = next2 < 128 && (char_types[next2] & CHAR_DIGIT) !== 0
if (is_next_digit || ((next === CHAR_PLUS || next === CHAR_HYPHEN) && is_next2_digit)) {
this.advance() // e or E
if (this.pos < this.source.length) {
let sign = this.source.charCodeAt(this.pos)
if (sign === CHAR_PLUS || sign === CHAR_HYPHEN) {
this.advance() // + or -
}
}
while (this.pos < this.source.length && is_digit(this.source.charCodeAt(this.pos))) {
// Hot path: inline digit check in tight loop
while (this.pos < this.source.length) {
let ch = this.source.charCodeAt(this.pos)
if (ch >= 128 || (char_types[ch] & CHAR_DIGIT) === 0) break
this.advance()
}
}
Expand Down