Skip to content

Commit c51b981

Browse files
committed
Merge branch 'html-api/add-css-selector-parser' into html-api/work-on-select-api
2 parents 9558c1f + e5e4c7e commit c51b981

17 files changed

+2312
-0
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
<?php
2+
3+
final class WP_CSS_Attribute_Selector implements WP_CSS_HTML_Tag_Processor_Matcher {
4+
const WHITESPACE_CHARACTERS = " \t\r\n\f";
5+
6+
public function matches( WP_HTML_Tag_Processor $processor ): bool {
7+
$att_value = $processor->get_attribute( $this->name );
8+
if ( null === $att_value ) {
9+
return false;
10+
}
11+
12+
if ( null === $this->value ) {
13+
return true;
14+
}
15+
16+
if ( true === $att_value ) {
17+
$att_value = '';
18+
}
19+
20+
$case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier;
21+
22+
switch ( $this->matcher ) {
23+
case self::MATCH_EXACT:
24+
return $case_insensitive
25+
? 0 === strcasecmp( $att_value, $this->value )
26+
: $att_value === $this->value;
27+
28+
case self::MATCH_ONE_OF_EXACT:
29+
foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) {
30+
if (
31+
$case_insensitive
32+
? 0 === strcasecmp( $val, $this->value )
33+
: $val === $this->value
34+
) {
35+
return true;
36+
}
37+
}
38+
return false;
39+
40+
case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN:
41+
// Attempt the full match first
42+
if (
43+
$case_insensitive
44+
? 0 === strcasecmp( $att_value, $this->value )
45+
: $att_value === $this->value
46+
) {
47+
return true;
48+
}
49+
50+
// Partial match
51+
if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) {
52+
return false;
53+
}
54+
55+
$starts_with = "{$this->value}-";
56+
return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive );
57+
58+
case self::MATCH_PREFIXED_BY:
59+
return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive );
60+
61+
case self::MATCH_SUFFIXED_BY:
62+
return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive );
63+
64+
case self::MATCH_CONTAINS:
65+
return false !== (
66+
$case_insensitive
67+
? stripos( $att_value, $this->value )
68+
: strpos( $att_value, $this->value )
69+
);
70+
}
71+
}
72+
73+
/**
74+
* @param string $input
75+
*
76+
* @return Generator<string>
77+
*/
78+
private function whitespace_delimited_list( string $input ): Generator {
79+
// Start by skipping whitespace.
80+
$offset = strspn( $input, self::WHITESPACE_CHARACTERS );
81+
82+
while ( $offset < strlen( $input ) ) {
83+
// Find the byte length until the next boundary.
84+
$length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset );
85+
$value = substr( $input, $offset, $length );
86+
87+
// Move past trailing whitespace.
88+
$offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length );
89+
90+
yield $value;
91+
}
92+
}
93+
94+
/**
95+
* [att=val]
96+
* Represents an element with the att attribute whose value is exactly "val".
97+
*/
98+
const MATCH_EXACT = 'MATCH_EXACT';
99+
100+
/**
101+
* [attr~=value]
102+
* Represents elements with an attribute name of attr whose value is a
103+
* whitespace-separated list of words, one of which is exactly value.
104+
*/
105+
const MATCH_ONE_OF_EXACT = 'MATCH_ONE_OF_EXACT';
106+
107+
/**
108+
* [attr|=value]
109+
* Represents elements with an attribute name of attr whose value can be exactly value or
110+
* can begin with value immediately followed by a hyphen, - (U+002D). It is often used for
111+
* language subcode matches.
112+
*/
113+
const MATCH_EXACT_OR_EXACT_WITH_HYPHEN = 'MATCH_EXACT_OR_EXACT_WITH_HYPHEN';
114+
115+
/**
116+
* [attr^=value]
117+
* Represents elements with an attribute name of attr whose value is prefixed (preceded)
118+
* by value.
119+
*/
120+
const MATCH_PREFIXED_BY = 'MATCH_PREFIXED_BY';
121+
122+
/**
123+
* [attr$=value]
124+
* Represents elements with an attribute name of attr whose value is suffixed (followed)
125+
* by value.
126+
*/
127+
const MATCH_SUFFIXED_BY = 'MATCH_SUFFIXED_BY';
128+
129+
/**
130+
* [attr*=value]
131+
* Represents elements with an attribute name of attr whose value contains at least one
132+
* occurrence of value within the string.
133+
*/
134+
const MATCH_CONTAINS = 'MATCH_CONTAINS';
135+
136+
/**
137+
* Modifier for case sensitive matching
138+
* [attr=value s]
139+
*/
140+
const MODIFIER_CASE_SENSITIVE = 'case-sensitive';
141+
142+
/**
143+
* Modifier for case insensitive matching
144+
* [attr=value i]
145+
*/
146+
const MODIFIER_CASE_INSENSITIVE = 'case-insensitive';
147+
148+
149+
/**
150+
* The attribute name.
151+
*
152+
* @var string
153+
*/
154+
public $name;
155+
156+
/**
157+
* The attribute matcher.
158+
*
159+
* @var null|self::MATCH_*
160+
*/
161+
public $matcher;
162+
163+
/**
164+
* The attribute value.
165+
*
166+
* @var string|null
167+
*/
168+
public $value;
169+
170+
/**
171+
* The attribute modifier.
172+
*
173+
* @var null|self::MODIFIER_*
174+
*/
175+
public $modifier;
176+
177+
/**
178+
* @param string $name
179+
* @param null|self::MATCH_* $matcher
180+
* @param null|string $value
181+
* @param null|self::MODIFIER_* $modifier
182+
*/
183+
public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) {
184+
$this->name = $name;
185+
$this->matcher = $matcher;
186+
$this->value = $value;
187+
$this->modifier = $modifier;
188+
}
189+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
final class WP_CSS_Class_Selector implements WP_CSS_HTML_Tag_Processor_Matcher {
4+
public function matches( WP_HTML_Tag_Processor $processor ): bool {
5+
return (bool) $processor->has_class( $this->ident );
6+
}
7+
8+
/** @var string */
9+
public $ident;
10+
11+
public function __construct( string $ident ) {
12+
$this->ident = $ident;
13+
}
14+
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
<?php
2+
/**
3+
* HTML API: WP_CSS_Complex_Selector_List class
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
* @since TBD
8+
*/
9+
10+
/**
11+
* Core class used by the {@see WP_HTML_Processor} to parse and match CSS selectors.
12+
*
13+
* This class is designed for internal use by the HTML processor.
14+
*
15+
* For usage, see {@see WP_HTML_Processor::select()} or {@see WP_HTML_Processor::select_all()}.
16+
*
17+
* This class is instantiated via the {@see WP_CSS_Complex_Selector_List::from_selectors()} method.
18+
* It takes a CSS selector string and returns an instance of itself or `null` if the selector
19+
* is invalid or unsupported.
20+
*
21+
* A subset of the CSS selector grammar is supported. The grammar is defined in the CSS Syntax
22+
* specification, which is available at {@link https://www.w3.org/TR/selectors/#grammar}.
23+
*
24+
* This class is rougly analogous to the <selector-list> in the grammar. See {@see WP_CSS_Compound_Selector_List} for more details on the grammar.
25+
*
26+
* This class supports the same selector syntax as {@see WP_CSS_Compound_Selector_List} as well as:
27+
* - The following combinators:
28+
* - Next sibling (`el + el`)
29+
* - Subsequent sibling (`el ~ el`)
30+
*
31+
* @since TBD
32+
*
33+
* @access private
34+
*/
35+
class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Processor_Matcher {
36+
/**
37+
* Takes a CSS selector string and returns an instance of itself or `null` if the selector
38+
* string is invalid or unsupported.
39+
*
40+
* @since TBD
41+
*
42+
* @param string $input CSS selectors.
43+
* @return static|null
44+
*/
45+
public static function from_selectors( string $input ) {
46+
$input = self::normalize_selector_input( $input );
47+
48+
if ( '' === $input ) {
49+
return null;
50+
}
51+
52+
$offset = 0;
53+
54+
$selector = self::parse_complex_selector( $input, $offset );
55+
if ( null === $selector ) {
56+
return null;
57+
}
58+
self::parse_whitespace( $input, $offset );
59+
60+
$selectors = array( $selector );
61+
while ( $offset < strlen( $input ) ) {
62+
// Each loop should stop on a `,` selector list delimiter.
63+
if ( ',' !== $input[ $offset ] ) {
64+
return null;
65+
}
66+
++$offset;
67+
self::parse_whitespace( $input, $offset );
68+
$selector = self::parse_complex_selector( $input, $offset );
69+
if ( null === $selector ) {
70+
return null;
71+
}
72+
$selectors[] = $selector;
73+
self::parse_whitespace( $input, $offset );
74+
}
75+
76+
return new self( $selectors );
77+
}
78+
79+
/*
80+
* ------------------------------
81+
* Selector parsing functionality
82+
* ------------------------------
83+
*/
84+
85+
/**
86+
* Parses a complex selector.
87+
*
88+
* > <complex-selector> = [ <type-selector> <combinator>? ]* <compound-selector>
89+
*
90+
* @return WP_CSS_Complex_Selector|null
91+
*/
92+
final protected static function parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector {
93+
if ( $offset >= strlen( $input ) ) {
94+
return null;
95+
}
96+
97+
$updated_offset = $offset;
98+
$selector = self::parse_compound_selector( $input, $updated_offset );
99+
if ( null === $selector ) {
100+
return null;
101+
}
102+
103+
$selectors = array( $selector );
104+
$has_preceding_subclass_selector = null !== $selector->subclass_selectors;
105+
106+
$found_whitespace = self::parse_whitespace( $input, $updated_offset );
107+
while ( $updated_offset < strlen( $input ) ) {
108+
if (
109+
WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] ||
110+
WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] ||
111+
WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ]
112+
) {
113+
$combinator = $input[ $updated_offset ];
114+
++$updated_offset;
115+
self::parse_whitespace( $input, $updated_offset );
116+
117+
// Failure to find a selector here is a parse error
118+
$selector = self::parse_compound_selector( $input, $updated_offset );
119+
} elseif ( $found_whitespace ) {
120+
/*
121+
* Whitespace is ambiguous, it could be a descendant combinator or
122+
* insignificant whitespace.
123+
*/
124+
$selector = self::parse_compound_selector( $input, $updated_offset );
125+
if ( null === $selector ) {
126+
break;
127+
}
128+
$combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT;
129+
} else {
130+
break;
131+
}
132+
133+
if ( null === $selector ) {
134+
return null;
135+
}
136+
137+
/*
138+
* Subclass selectors in non-final position is not supported:
139+
* - `div > .className` is valid
140+
* - `.className > div` is not
141+
*/
142+
if ( $has_preceding_subclass_selector ) {
143+
return null;
144+
}
145+
$has_preceding_subclass_selector = null !== $selector->subclass_selectors;
146+
147+
$selectors[] = $combinator;
148+
$selectors[] = $selector;
149+
150+
$found_whitespace = self::parse_whitespace( $input, $updated_offset );
151+
}
152+
$offset = $updated_offset;
153+
return new WP_CSS_Complex_Selector( $selectors );
154+
}
155+
}

0 commit comments

Comments
 (0)