Skip to content

Commit 4182b4d

Browse files
fix(tokenizer): aggregate shorthand floats, better EOL handling in comments
1 parent e0dcff2 commit 4182b4d

File tree

4 files changed

+83
-13
lines changed

4 files changed

+83
-13
lines changed

src/constants.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ export const WGSL_KEYWORDS = [
9999
'discard',
100100
'else',
101101
'enable',
102-
'false',
102+
// 'false',
103103
'fn',
104104
'for',
105105
'if',
@@ -109,7 +109,7 @@ export const WGSL_KEYWORDS = [
109109
'return',
110110
'struct',
111111
'switch',
112-
'true',
112+
// 'true',
113113
'var',
114114
'while',
115115

@@ -501,8 +501,8 @@ export const GLSL_KEYWORDS = [
501501
'int',
502502
'void',
503503
'bool',
504-
'true',
505-
'false',
504+
// 'true',
505+
// 'false',
506506
'invariant',
507507
'discard',
508508
'return',

src/tokenizer.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ const CR = 13
2323
const TAB = 9
2424
const SPACE = 32
2525
const UNDERSCORE = 95
26+
const DOT = 46
2627
const SLASH = 47
2728
const STAR = 42
2829
const HASH = 35
@@ -50,7 +51,7 @@ export function tokenize(code: string, index: number = 0): Token[] {
5051
if (isSpace(char)) {
5152
while (isSpace(code.charCodeAt(index))) value += code[index++]
5253
tokens.push({ type: 'whitespace', value })
53-
} else if (isDigit(char)) {
54+
} else if (isDigit(char) || (char === DOT && isDigit(code.charCodeAt(index)))) {
5455
while (isFloat(value + code[index]) || isInt(value + code[index])) value += code[index++]
5556
if (isFloat(value)) tokens.push({ type: 'float', value })
5657
else tokens.push({ type: 'int', value })
@@ -62,8 +63,8 @@ export function tokenize(code: string, index: number = 0): Token[] {
6263
else tokens.push({ type: 'identifier', value })
6364
} else if (char === SLASH && (code.charCodeAt(index) === SLASH || code.charCodeAt(index) === STAR)) {
6465
const terminator = code.charCodeAt(index) === STAR ? '*/' : '\n'
65-
while (!value.endsWith(terminator)) value += code[index++]
66-
tokens.push({ type: 'comment', value })
66+
while (index < code.length && !value.endsWith(terminator)) value += code[index++]
67+
tokens.push({ type: 'comment', value: value.trim() })
6768
} else {
6869
for (const symbol of SYMBOLS) {
6970
if (symbol.length > value.length && code.startsWith(symbol, index - 1)) value = symbol

tests/__snapshots__/index.test.ts.snap

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,7 @@ exports[`tokenize > can tokenize GLSL 1`] = `
226226
},
227227
{
228228
"type": "comment",
229-
"value": "// single line
230-
",
229+
"value": "// single line",
231230
},
232231
{
233232
"type": "whitespace",
@@ -330,8 +329,7 @@ exports[`tokenize > can tokenize GLSL 1`] = `
330329
},
331330
{
332331
"type": "comment",
333-
"value": "// inline comment
334-
",
332+
"value": "// inline comment",
335333
},
336334
{
337335
"type": "whitespace",
@@ -1703,8 +1701,7 @@ exports[`tokenize > can tokenize WGSL 1`] = `
17031701
},
17041702
{
17051703
"type": "comment",
1706-
"value": "// single line
1707-
",
1704+
"value": "// single line",
17081705
},
17091706
{
17101707
"type": "whitespace",

tests/tokenizer.test.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { describe, it, expect } from 'vitest'
2+
import { tokenize, type Token, GLSL_KEYWORDS, GLSL_SYMBOLS, WGSL_KEYWORDS, WGSL_SYMBOLS } from 'shaderkit'
3+
4+
describe('tokenizer', () => {
5+
it('can handle whitespace', () => {
6+
expect(tokenize(' \n\t')).toStrictEqual<Token[]>([{ type: 'whitespace', value: ' \n\t' }])
7+
})
8+
9+
it('can handle comments', () => {
10+
expect(tokenize('// comment')).toStrictEqual<Token[]>([{ type: 'comment', value: '// comment' }])
11+
expect(tokenize('/* comment */')).toStrictEqual<Token[]>([{ type: 'comment', value: '/* comment */' }])
12+
})
13+
14+
it('can handle floats', () => {
15+
expect(tokenize('0.0')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0' }])
16+
expect(tokenize('0.')).toStrictEqual<Token[]>([{ type: 'float', value: '0.' }])
17+
expect(tokenize('.0')).toStrictEqual<Token[]>([{ type: 'float', value: '.0' }])
18+
expect(tokenize('0.0f')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0f' }])
19+
expect(tokenize('0.0F')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0F' }])
20+
expect(tokenize('0.0h')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0h' }])
21+
expect(tokenize('0.0H')).toStrictEqual<Token[]>([{ type: 'float', value: '0.0H' }])
22+
// expect(tokenize('1.23e3')).toStrictEqual<Token[]>([{ type: 'float', value: '1.23e3' }])
23+
// expect(tokenize('4e-2')).toStrictEqual<Token[]>([{ type: 'float', value: '4e-2' }])
24+
// expect(tokenize('3E+4')).toStrictEqual<Token[]>([{ type: 'float', value: '3E+4' }])
25+
})
26+
27+
it('can handle integers', () => {
28+
expect(tokenize('0')).toStrictEqual<Token[]>([{ type: 'int', value: '0' }])
29+
expect(tokenize('0u')).toStrictEqual<Token[]>([{ type: 'int', value: '0u' }])
30+
expect(tokenize('0U')).toStrictEqual<Token[]>([{ type: 'int', value: '0U' }])
31+
expect(tokenize('0i')).toStrictEqual<Token[]>([{ type: 'int', value: '0i' }])
32+
expect(tokenize('0I')).toStrictEqual<Token[]>([{ type: 'int', value: '0I' }])
33+
// expect(tokenize('0xFF')).toStrictEqual<Token[]>([{ type: 'int', value: '0xFF' }])
34+
// expect(tokenize('0XFF')).toStrictEqual<Token[]>([{ type: 'int', value: '0XFF' }])
35+
})
36+
37+
it('can handle identifiers', () => {
38+
expect(tokenize('test')).toStrictEqual<Token[]>([{ type: 'identifier', value: 'test' }])
39+
})
40+
41+
it('can handle booleans', () => {
42+
expect(tokenize('true')).toStrictEqual<Token[]>([{ type: 'bool', value: 'true' }])
43+
expect(tokenize('false')).toStrictEqual<Token[]>([{ type: 'bool', value: 'false' }])
44+
})
45+
46+
it('can handle identifiers', () => {
47+
expect(tokenize('test')).toStrictEqual<Token[]>([{ type: 'identifier', value: 'test' }])
48+
})
49+
50+
it('can handle reserved words', () => {
51+
// NOTE: language detection is context-sensitive
52+
for (const keyword of WGSL_KEYWORDS) {
53+
expect(tokenize(`var test: type;${keyword}`).slice(-1)).toStrictEqual<Token[]>([
54+
{ type: 'keyword', value: keyword.replace('@', '') },
55+
])
56+
}
57+
for (const keyword of GLSL_KEYWORDS) {
58+
expect(tokenize(keyword).slice(-1)).toStrictEqual<Token[]>([{ type: 'keyword', value: keyword.replace('#', '') }])
59+
}
60+
})
61+
62+
it('can handle symbols', () => {
63+
// NOTE: language detection is context-sensitive
64+
const comments = /\/\/|\/\*|\*\//
65+
for (const symbol of WGSL_SYMBOLS.filter((s) => !comments.test(s))) {
66+
expect(tokenize(`var test: type;${symbol}`).slice(-1)).toStrictEqual<Token[]>([{ type: 'symbol', value: symbol }])
67+
}
68+
for (const symbol of GLSL_SYMBOLS.filter((s) => !comments.test(s))) {
69+
expect(tokenize(symbol).slice(-1)).toStrictEqual<Token[]>([{ type: 'symbol', value: symbol }])
70+
}
71+
})
72+
})

0 commit comments

Comments
 (0)