Skip to content

Commit 705476c

Browse files
committed
add attribute selector parser
1 parent e7ce1ad commit 705476c

File tree

3 files changed

+328
-0
lines changed

3 files changed

+328
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import { bench, describe } from 'vitest'
2+
import * as AttributeSelectorParser from './attribute-selector-parser'
3+
4+
let examples = [
5+
'[open]',
6+
'[data-foo]',
7+
'[data-state=expanded]',
8+
'[data-state = expanded ]',
9+
'[data-state*="expanded"]',
10+
'[data-state*="expanded"i]',
11+
'[data-state*=expanded i]',
12+
]
13+
14+
const ATTRIBUTE_REGEX =
15+
/\[\s*(?<attribute>[a-zA-Z_-][a-zA-Z0-9_-]*)\s*((?<operator>[*|~^$]?=)\s*(?<quote>['"])?\s*(?<value>.*?)\4\s*(?<sensitivity>[is])?\s*)?\]/
16+
17+
describe('parsing', () => {
18+
bench('AttributeSelectorParser.parse', () => {
19+
for (let example of examples) {
20+
AttributeSelectorParser.parse(example)
21+
}
22+
})
23+
24+
bench('REGEX.test(…)', () => {
25+
for (let example of examples) {
26+
ATTRIBUTE_REGEX.exec(example)
27+
}
28+
})
29+
30+
bench('….match(REGEX)', () => {
31+
for (let example of examples) {
32+
example.match(ATTRIBUTE_REGEX)
33+
}
34+
})
35+
})
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { parse } from './attribute-selector-parser'
3+
4+
describe('parse', () => {
5+
it.each([
6+
[''],
7+
[']'],
8+
['[]'],
9+
['['],
10+
['="value"'],
11+
['data-foo]'],
12+
['[data-foo'],
13+
['[data-foo="foo]'],
14+
['[data-foo * = foo]'],
15+
['[data-foo*=]'],
16+
['[data-foo=value x]'],
17+
['[data-foo=value ix]'],
18+
])('should parse an invalid attribute selector (%s) as `null`', (input) => {
19+
expect(parse(input)).toBeNull()
20+
})
21+
22+
it.each([
23+
[
24+
'[data-foo]',
25+
{ attribute: 'data-foo', operator: null, quote: null, value: null, sensitivity: null },
26+
],
27+
[
28+
'[ data-foo ]',
29+
{ attribute: 'data-foo', operator: null, quote: null, value: null, sensitivity: null },
30+
],
31+
[
32+
'[data-state=expanded]',
33+
{ attribute: 'data-state', operator: '=', quote: null, value: 'expanded', sensitivity: null },
34+
],
35+
[
36+
'[data-state = expanded ]',
37+
{ attribute: 'data-state', operator: '=', quote: null, value: 'expanded', sensitivity: null },
38+
],
39+
[
40+
'[data-state*="expanded"]',
41+
{ attribute: 'data-state', operator: '*=', quote: '"', value: 'expanded', sensitivity: null },
42+
],
43+
[
44+
'[data-state*="expanded"i]',
45+
{ attribute: 'data-state', operator: '*=', quote: '"', value: 'expanded', sensitivity: 'i' },
46+
],
47+
[
48+
'[data-state*=expanded i]',
49+
{ attribute: 'data-state', operator: '*=', quote: null, value: 'expanded', sensitivity: 'i' },
50+
],
51+
])('should parse correctly: %s', (selector, expected) => {
52+
expect(parse(selector)).toEqual(expected)
53+
})
54+
55+
it('should work with a real-world example', () => {
56+
expect(parse('[data-url$=".com"i]')).toEqual({
57+
attribute: 'data-url',
58+
operator: '$=',
59+
quote: '"',
60+
value: '.com',
61+
sensitivity: 'i',
62+
})
63+
})
64+
})
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
const TAB = 9
2+
const LINE_BREAK = 10
3+
const CARRIAGE_RETURN = 13
4+
const SPACE = 32
5+
const DOUBLE_QUOTE = 34
6+
const DOLLAR = 36
7+
const SINGLE_QUOTE = 39
8+
const ASTERISK = 42
9+
const EQUALS = 61
10+
const UPPER_I = 73
11+
const UPPER_S = 83
12+
const BACKSLASH = 92
13+
const CARET = 94
14+
const LOWER_I = 105
15+
const LOWER_S = 115
16+
const PIPE = 124
17+
const TILDE = 126
18+
const LOWER_A = 97
19+
const LOWER_Z = 122
20+
const UPPER_A = 65
21+
const UPPER_Z = 90
22+
const ZERO = 48
23+
const NINE = 57
24+
const DASH = 45
25+
const UNDERSCORE = 95
26+
27+
interface AttributeSelector {
28+
attribute: string
29+
operator: '=' | '~=' | '|=' | '^=' | '$=' | '*=' | null
30+
quote: '"' | "'" | null
31+
value: string | null
32+
sensitivity: 'i' | 's' | null
33+
}
34+
35+
export function parse(input: string): AttributeSelector | null {
36+
// Must start with `[` and end with `]`
37+
if (input[0] !== '[' || input[input.length - 1] !== ']') {
38+
return null
39+
}
40+
41+
let i = 1
42+
let start = i
43+
let end = input.length - 1
44+
45+
// Skip whitespace, e.g.: [ data-foo]
46+
// ^^^
47+
while (isAsciiWhitespace(input.charCodeAt(i))) i++
48+
49+
// Attribute name, e.g.: [data-foo]
50+
// ^^^^^^^^
51+
{
52+
start = i
53+
for (; i < end; i++) {
54+
let currentChar = input.charCodeAt(i)
55+
// Skip escaped character
56+
if (currentChar === BACKSLASH) {
57+
i++
58+
continue
59+
}
60+
if (currentChar >= UPPER_A && currentChar <= UPPER_Z) continue
61+
if (currentChar >= LOWER_A && currentChar <= LOWER_Z) continue
62+
if (currentChar >= ZERO && currentChar <= NINE) continue
63+
if (currentChar === DASH || currentChar === UNDERSCORE) continue
64+
break
65+
}
66+
67+
// Must have at least one character in the attribute name
68+
if (start === i) {
69+
return null
70+
}
71+
}
72+
let attribute = input.slice(start, i)
73+
74+
// Skip whitespace, e.g.: [data-foo =value]
75+
// ^^^
76+
while (isAsciiWhitespace(input.charCodeAt(i))) i++
77+
78+
// At the end, e.g.: `[data-foo]`
79+
if (i === end) {
80+
return {
81+
attribute,
82+
operator: null,
83+
quote: null,
84+
value: null,
85+
sensitivity: null,
86+
}
87+
}
88+
89+
// Operator, e.g.: [data-foo*=value]
90+
// ^^
91+
let operator = null
92+
let currentChar = input.charCodeAt(i)
93+
if (currentChar === EQUALS) {
94+
operator = '='
95+
i++
96+
} else if (
97+
(currentChar === TILDE ||
98+
currentChar === PIPE ||
99+
currentChar === CARET ||
100+
currentChar === DOLLAR ||
101+
currentChar === ASTERISK) &&
102+
input.charCodeAt(i + 1) === EQUALS
103+
) {
104+
operator = input[i] + '='
105+
i += 2
106+
} else {
107+
return null // Invalid operator
108+
}
109+
110+
// Skip whitespace, e.g.: [data-foo*= value]
111+
// ^^^
112+
while (isAsciiWhitespace(input.charCodeAt(i))) i++
113+
114+
// At the end, that means that we have an operator but no valid, which is
115+
// invalid, e.g.: `[data-foo*=]`
116+
if (i === end) {
117+
return null
118+
}
119+
120+
// Value, e.g.: [data-foo*=value]
121+
// ^^^^^
122+
let value = ''
123+
124+
// Quoted value, e.g.: [data-foo*="value"]
125+
// ^^^^^^^
126+
let quote = null
127+
currentChar = input.charCodeAt(i)
128+
if (currentChar === SINGLE_QUOTE || currentChar === DOUBLE_QUOTE) {
129+
quote = input[i] as '"' | "'"
130+
i++
131+
132+
start = i
133+
for (let j = i; j < end; j++) {
134+
let current = input.charCodeAt(j)
135+
// Found ending quote
136+
if (current === currentChar) {
137+
i = j + 1
138+
}
139+
140+
// Skip escaped character
141+
else if (current === BACKSLASH) {
142+
j++
143+
}
144+
}
145+
146+
value = input.slice(start, i - 1)
147+
}
148+
149+
// Unquoted value, e.g.: [data-foo*=value]
150+
// ^^^^^
151+
else {
152+
start = i
153+
// Keep going until we find whitespace or the end
154+
while (i < end && !isAsciiWhitespace(input.charCodeAt(i))) i++
155+
value = input.slice(start, i)
156+
}
157+
158+
// Skip whitespace, e.g.: [data-foo*=value ]
159+
// ^^^
160+
while (isAsciiWhitespace(input.charCodeAt(i))) i++
161+
162+
// At the end, e.g.: `[data-foo=value]`
163+
if (i === end) {
164+
return {
165+
attribute,
166+
operator: operator as '=' | '~=' | '|=' | '^=' | '$=' | '*=',
167+
quote: quote as '"' | "'" | null,
168+
value,
169+
sensitivity: null,
170+
}
171+
}
172+
173+
// Sensitivity, e.g.: [data-foo=value i]
174+
// ^
175+
let sensitivity = null
176+
{
177+
switch (input.charCodeAt(i)) {
178+
case LOWER_I:
179+
case UPPER_I: {
180+
sensitivity = 'i'
181+
i++
182+
break
183+
}
184+
185+
case LOWER_S:
186+
case UPPER_S: {
187+
sensitivity = 's'
188+
i++
189+
break
190+
}
191+
192+
default:
193+
return null // Invalid sensitivity
194+
}
195+
}
196+
197+
// Skip whitespace, e.g.: [data-foo=value i ]
198+
// ^^^
199+
while (isAsciiWhitespace(input.charCodeAt(i))) i++
200+
201+
// We must be at the end now, if not, then there is an additional character
202+
// after the sensitivity which is invalid, e.g.: [data-foo=value iX]
203+
// ^
204+
if (i !== end) {
205+
return null
206+
}
207+
208+
// Fully done
209+
return {
210+
attribute,
211+
operator: operator as '=' | '~=' | '|=' | '^=' | '$=' | '*=',
212+
quote: quote as '"' | "'" | null,
213+
value,
214+
sensitivity: sensitivity as 'i' | 's' | null,
215+
}
216+
}
217+
218+
function isAsciiWhitespace(code: number): boolean {
219+
switch (code) {
220+
case SPACE:
221+
case TAB:
222+
case LINE_BREAK:
223+
case CARRIAGE_RETURN:
224+
return true
225+
226+
default:
227+
return false
228+
}
229+
}

0 commit comments

Comments
 (0)