Skip to content

Commit a0815f9

Browse files
committed
Tokenizer performance tweaks:
* Don't explode string into character array; use global counter * Replace string.match() with regex.test() * Expand some trivial regular expressions to plain char matching
1 parent d804a04 commit a0815f9

File tree

1 file changed

+50
-52
lines changed

1 file changed

+50
-52
lines changed

src/parse.js

Lines changed: 50 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@ export const Tok = {
1717
Alpha: 'alpha'
1818
};
1919

20+
let _i = 0;
21+
2022
/*
2123
4.3.10. Check if three code points would start a number
2224
https://drafts.csswg.org/css-syntax/#starts-with-a-number
2325
*/
2426
function is_num(chars) {
25-
let ch = chars[chars._i];
26-
let ch1 = chars[chars._i + 1];
27+
let ch = chars[_i];
28+
let ch1 = chars[_i + 1];
2729
if (ch === '-' || ch === '+') {
28-
return (
29-
/\d/.test(ch1) || (ch1 === '.' && /\d/.test(chars[chars._i + 2]))
30-
);
30+
return /\d/.test(ch1) || (ch1 === '.' && /\d/.test(chars[_i + 2]));
3131
}
3232
if (ch === '.') {
3333
return /\d/.test(ch1);
@@ -40,19 +40,19 @@ function is_num(chars) {
4040
*/
4141

4242
function is_ident(chars) {
43-
if (chars._i >= chars.length) {
43+
if (_i >= chars.length) {
4444
return false;
4545
}
46-
let ch = chars[chars._i];
47-
if (ch.match(IdentStartCodePoint)) {
46+
let ch = chars[_i];
47+
if (IdentStartCodePoint.test(ch)) {
4848
return true;
4949
}
5050
if (ch === '-') {
51-
if (chars.length - chars._i < 2) {
51+
if (chars.length - _i < 2) {
5252
return false;
5353
}
54-
let ch1 = chars[chars._i + 1];
55-
if (ch1.match(IdentStartCodePoint) || ch1 === '-') {
54+
let ch1 = chars[_i + 1];
55+
if (ch1 === '-' || IdentStartCodePoint.test(ch1)) {
5656
return true;
5757
}
5858
return false;
@@ -74,32 +74,32 @@ const huenits = {
7474

7575
function num(chars) {
7676
let value = '';
77-
if (/[+-]/.test(chars[chars._i])) {
78-
value += chars[chars._i++];
77+
if (chars[_i] === '-' || chars[_i] === '+') {
78+
value += chars[_i++];
7979
}
8080
value += digits(chars);
81-
if (chars[chars._i] === '.' && /\d/.test(chars[chars._i + 1])) {
82-
value += chars[chars._i++] + digits(chars);
81+
if (chars[_i] === '.' && /\d/.test(chars[_i + 1])) {
82+
value += chars[_i++] + digits(chars);
8383
}
84-
if (/e/i.test(chars[chars._i])) {
84+
if (chars[_i] === 'e' || chars[_i] === 'E') {
8585
if (
86-
/[+-]/.test(chars[chars._i + 1]) &&
87-
/\d/.test(chars[chars._i + 2])
86+
(chars[_i + 1] === '-' || chars[_i + 1] === '+') &&
87+
/\d/.test(chars[_i + 2])
8888
) {
89-
value += chars[chars._i++] + chars[chars._i++] + digits(chars);
90-
} else if (/\d/.test(chars[chars._i + 1])) {
91-
value += chars[chars._i++] + digits(chars);
89+
value += chars[_i++] + chars[_i++] + digits(chars);
90+
} else if (/\d/.test(chars[_i + 1])) {
91+
value += chars[_i++] + digits(chars);
9292
}
9393
}
9494
if (is_ident(chars)) {
9595
let id = ident(chars);
96-
if (/deg|rad|turn|grad/.test(id)) {
96+
if (id === 'deg' || id === 'rad' || id === 'turn' || id === 'grad') {
9797
return { type: Tok.Hue, value: value * huenits[id] };
9898
}
9999
return undefined;
100100
}
101-
if (chars[chars._i] === '%') {
102-
chars._i++;
101+
if (chars[_i] === '%') {
102+
_i++;
103103
return { type: Tok.Percentage, value: +value };
104104
}
105105
return { type: Tok.Number, value: +value };
@@ -110,8 +110,8 @@ function num(chars) {
110110
*/
111111
function digits(chars) {
112112
let v = '';
113-
while (/\d/.test(chars[chars._i])) {
114-
v += chars[chars._i++];
113+
while (/\d/.test(chars[_i])) {
114+
v += chars[_i++];
115115
}
116116
return v;
117117
}
@@ -121,8 +121,8 @@ function digits(chars) {
121121
*/
122122
function ident(chars) {
123123
let v = '';
124-
while (chars._i < chars.length && chars[chars._i].match(IdentCodePoint)) {
125-
v += chars[chars._i++];
124+
while (_i < chars.length && IdentCodePoint.test(chars[_i])) {
125+
v += chars[_i++];
126126
}
127127
return v;
128128
}
@@ -132,8 +132,8 @@ function ident(chars) {
132132
*/
133133
function identlike(chars) {
134134
let v = ident(chars);
135-
if (chars[chars._i] === '(') {
136-
chars._i++;
135+
if (chars[_i] === '(') {
136+
_i++;
137137
return { type: Tok.Function, value: v };
138138
}
139139
if (v === 'none') {
@@ -143,25 +143,25 @@ function identlike(chars) {
143143
}
144144

145145
export function tokenize(str = '') {
146-
let chars = str.trim().split('');
147-
chars._i = 0;
146+
let chars = str.trim();
148147
let tokens = [];
149148
let ch;
150149

151-
while (chars._i < chars.length) {
152-
ch = chars[chars._i++];
150+
/* reset counter */
151+
_i = 0;
152+
153+
while (_i < chars.length) {
154+
ch = chars[_i++];
153155

154156
/*
155157
Consume whitespace without emitting it
156158
*/
157159
if (ch === '\n' || ch === '\t' || ch === ' ') {
158160
while (
159-
chars._i < chars.length &&
160-
(chars[chars._i] === '\n' ||
161-
chars[chars._i] === '\t' ||
162-
chars[chars._i] === ' ')
161+
_i < chars.length &&
162+
(chars[_i] === '\n' || chars[_i] === '\t' || chars[_i] === ' ')
163163
) {
164-
chars._i++;
164+
_i++;
165165
}
166166
continue;
167167
}
@@ -177,7 +177,7 @@ export function tokenize(str = '') {
177177

178178
if (ch === '+') {
179179
if (is_num(chars)) {
180-
chars._i--;
180+
_i--;
181181
tokens.push(num(chars));
182182
continue;
183183
}
@@ -186,11 +186,11 @@ export function tokenize(str = '') {
186186

187187
if (ch === '-') {
188188
if (is_num(chars)) {
189-
chars._i--;
189+
_i--;
190190
tokens.push(num(chars));
191191
continue;
192192
} else if (is_ident(chars)) {
193-
chars._i--;
193+
_i--;
194194
tokens.push({ type: Tok.Ident, value: ident(chars) });
195195
continue;
196196
}
@@ -199,7 +199,7 @@ export function tokenize(str = '') {
199199

200200
if (ch === '.') {
201201
if (is_num(chars)) {
202-
chars._i--;
202+
_i--;
203203
tokens.push(num(chars));
204204
continue;
205205
}
@@ -208,12 +208,10 @@ export function tokenize(str = '') {
208208

209209
if (ch === '/') {
210210
while (
211-
chars._i < chars.length &&
212-
(chars[chars._i] === '\n' ||
213-
chars[chars._i] === '\t' ||
214-
chars[chars._i] === ' ')
211+
_i < chars.length &&
212+
(chars[_i] === '\n' || chars[_i] === '\t' || chars[_i] === ' ')
215213
) {
216-
chars._i++;
214+
_i++;
217215
}
218216
let alpha;
219217
if (is_num(chars)) {
@@ -235,14 +233,14 @@ export function tokenize(str = '') {
235233
return undefined;
236234
}
237235

238-
if (ch.match(/\d/)) {
239-
chars._i--;
236+
if (/\d/.test(ch)) {
237+
_i--;
240238
tokens.push(num(chars));
241239
continue;
242240
}
243241

244-
if (ch.match(IdentStartCodePoint)) {
245-
chars._i--;
242+
if (IdentStartCodePoint.test(ch)) {
243+
_i--;
246244
tokens.push(identlike(chars));
247245
continue;
248246
}

0 commit comments

Comments
 (0)