Skip to content

Commit 3695286

Browse files
fix(tokenizer): greedy match for complex numbers
1 parent 3f3bbee commit 3695286

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed

src/tokenizer.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ export interface Token<T = TokenType, V = string> {
1010
// Checks for WGSL-specific `fn foo(`, `var bar =`, `let baz =`, `const qux =`
1111
const WGSL_REGEX = /\bfn\s+\w+\s*\(|\b(var|let|const)\s+\w+\s*[:=]/
1212

13-
const FLOAT_REGEX = /^(\d+\.\d*|\d*\.\d+)([eEpP][-+]?\d+)?[fFhH]?$/
14-
const INT_REGEX = /^(0[xX][\w\d]+|\d+)[iIuU]?$/
13+
const FLOAT_REGEX = /((\d+\.\d*|\d*\.\d+)([eEpP][-+]?\d+)?|\d+[eEpP][-+]?\d+)[fFhH]?/y
14+
const INT_REGEX = /(0[xX][\w\d]+|\d+)[iIuU]?/y
1515
const BOOL_REGEX = /^(true|false)$/
1616

1717
const ZERO = 48
@@ -36,6 +36,12 @@ const isSpace = (c: number) => isLine(c) || c === TAB || c === SPACE
3636
const isIdent = (c: number) => isAlpha(c) || isDigit(c) || c === UNDERSCORE
3737
const isMacro = (c: number) => c === HASH || c === AT
3838

39+
// https://mrale.ph/blog/2016/11/23/making-less-dart-faster.html
40+
function matchAsPrefix(regex: RegExp, string: string, start: number): string | undefined {
41+
regex.lastIndex = start
42+
return regex.exec(string)?.[0]
43+
}
44+
3945
/**
4046
* Tokenizes a string of GLSL or WGSL code.
4147
*/
@@ -52,9 +58,13 @@ export function tokenize(code: string, index: number = 0): Token[] {
5258
while (isSpace(code.charCodeAt(index))) value += code[index++]
5359
tokens.push({ type: 'whitespace', value })
5460
} else if (isDigit(char) || (char === DOT && isDigit(code.charCodeAt(index)))) {
55-
while (FLOAT_REGEX.test(value + code[index]) || INT_REGEX.test(value + code[index])) value += code[index++]
56-
if (FLOAT_REGEX.test(value)) tokens.push({ type: 'float', value })
57-
else tokens.push({ type: 'int', value })
61+
if ((value = matchAsPrefix(FLOAT_REGEX, code, index - 1)!)) {
62+
index = FLOAT_REGEX.lastIndex
63+
tokens.push({ type: 'float', value })
64+
} else if ((value = matchAsPrefix(INT_REGEX, code, index - 1)!)) {
65+
index = INT_REGEX.lastIndex
66+
tokens.push({ type: 'int', value })
67+
}
5868
} else if (isIdent(char)) {
5969
while (isIdent(code.charCodeAt(index))) value += code[index++]
6070
if (BOOL_REGEX.test(value)) tokens.push({ type: 'bool', value })

tests/tokenizer.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ describe('tokenizer', () => {
1919
expect(tokenize('0.0F')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0F' }])
2020
expect(tokenize('0.0h')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0h' }])
2121
expect(tokenize('0.0H')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0H' }])
22-
// expect(tokenize('1.23e3')).toStrictEqual<Token[]>([{ type: 'float', value: '1.23e3' }])
23-
// expect(tokenize('4e-2')).toStrictEqual<Token[]>([{ type: 'float', value: '4e-2' }])
24-
// expect(tokenize('3E+4')).toStrictEqual<Token[]>([{ type: 'float', value: '3E+4' }])
22+
expect(tokenize('1.23e3')).toStrictEqual<Token[]>([{ type: 'float', value: '1.23e3' }])
23+
expect(tokenize('4e-2')).toStrictEqual<Token[]>([{ type: 'float', value: '4e-2' }])
24+
expect(tokenize('3E+4')).toStrictEqual<Token[]>([{ type: 'float', value: '3E+4' }])
2525
})
2626

2727
it('can handle integers', () => {
@@ -30,8 +30,8 @@ describe('tokenizer', () => {
3030
expect(tokenize('0U')).toStrictEqual<Token[]>([{ type: 'int', value: '0U' }])
3131
expect(tokenize('0i')).toStrictEqual<Token[]>([{ type: 'int', value: '0i' }])
3232
expect(tokenize('0I')).toStrictEqual<Token[]>([{ type: 'int', value: '0I' }])
33-
// expect(tokenize('0xFF')).toStrictEqual<Token[]>([{ type: 'int', value: '0xFF' }])
34-
// expect(tokenize('0XFF')).toStrictEqual<Token[]>([{ type: 'int', value: '0XFF' }])
33+
expect(tokenize('0xFF')).toStrictEqual<Token[]>([{ type: 'int', value: '0xFF' }])
34+
expect(tokenize('0XFF')).toStrictEqual<Token[]>([{ type: 'int', value: '0XFF' }])
3535
})
3636

3737
it('can handle identifiers', () => {

0 commit comments

Comments
 (0)