Skip to content

Commit ef5cdcd

Browse files
committed
Merge branch 'html-api/add-css-selector-parser' into html-api/work-on-select-api
2 parents b67c76e + b7e032e commit ef5cdcd

17 files changed

+2341
-0
lines changed
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
<?php
2+
/**
3+
* HTML API: WP_CSS_Attribute_Selector class
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
* @since TBD
8+
*/
9+
10+
/**
11+
* CSS attribute selector.
12+
*
13+
* This class implements a CSS attribute selector and is used to test for matching HTML tags
14+
* in a {@see WP_HTML_Tag_Processor}.
15+
*
16+
* @since TBD
17+
*
18+
* @access private
19+
*/
20+
final class WP_CSS_Attribute_Selector implements WP_CSS_HTML_Tag_Processor_Matcher {
21+
/**
22+
* [att=val]
23+
* Represents an element with the att attribute whose value is exactly "val".
24+
*/
25+
const MATCH_EXACT = 'MATCH_EXACT';
26+
27+
/**
28+
* [attr~=value]
29+
* Represents elements with an attribute name of attr whose value is a
30+
* whitespace-separated list of words, one of which is exactly value.
31+
*/
32+
const MATCH_ONE_OF_EXACT = 'MATCH_ONE_OF_EXACT';
33+
34+
/**
35+
* [attr|=value]
36+
* Represents elements with an attribute name of attr whose value can be exactly value or
37+
* can begin with value immediately followed by a hyphen, - (U+002D). It is often used for
38+
* language subcode matches.
39+
*/
40+
const MATCH_EXACT_OR_EXACT_WITH_HYPHEN = 'MATCH_EXACT_OR_EXACT_WITH_HYPHEN';
41+
42+
/**
43+
* [attr^=value]
44+
* Represents elements with an attribute name of attr whose value is prefixed (preceded)
45+
* by value.
46+
*/
47+
const MATCH_PREFIXED_BY = 'MATCH_PREFIXED_BY';
48+
49+
/**
50+
* [attr$=value]
51+
* Represents elements with an attribute name of attr whose value is suffixed (followed)
52+
* by value.
53+
*/
54+
const MATCH_SUFFIXED_BY = 'MATCH_SUFFIXED_BY';
55+
56+
/**
57+
* [attr*=value]
58+
* Represents elements with an attribute name of attr whose value contains at least one
59+
* occurrence of value within the string.
60+
*/
61+
const MATCH_CONTAINS = 'MATCH_CONTAINS';
62+
63+
/**
64+
* Modifier for case sensitive matching
65+
* [attr=value s]
66+
*/
67+
const MODIFIER_CASE_SENSITIVE = 'case-sensitive';
68+
69+
/**
70+
* Modifier for case insensitive matching
71+
* [attr=value i]
72+
*/
73+
const MODIFIER_CASE_INSENSITIVE = 'case-insensitive';
74+
75+
/**
76+
* The attribute name.
77+
*
78+
* @var string
79+
* @readonly
80+
*/
81+
public $name;
82+
83+
/**
84+
* The attribute matcher.
85+
*
86+
* @var null|self::MATCH_*
87+
* @readonly
88+
*/
89+
public $matcher;
90+
91+
/**
92+
* The attribute value.
93+
*
94+
* @var string|null
95+
* @readonly
96+
*/
97+
public $value;
98+
99+
/**
100+
* The attribute modifier.
101+
*
102+
* @var null|self::MODIFIER_*
103+
* @readonly
104+
*/
105+
public $modifier;
106+
107+
/**
108+
* Constructor.
109+
*
110+
* @param string $name
111+
* @param null|self::MATCH_* $matcher
112+
* @param null|string $value
113+
* @param null|self::MODIFIER_* $modifier
114+
*/
115+
public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) {
116+
$this->name = $name;
117+
$this->matcher = $matcher;
118+
$this->value = $value;
119+
$this->modifier = $modifier;
120+
}
121+
122+
/**
123+
* Determines if the processor's current position matches the selector.
124+
*
125+
* @param WP_HTML_Tag_Processor $processor
126+
* @return bool True if the processor's current position matches the selector.
127+
*/
128+
public function matches( WP_HTML_Tag_Processor $processor ): bool {
129+
$att_value = $processor->get_attribute( $this->name );
130+
if ( null === $att_value ) {
131+
return false;
132+
}
133+
134+
if ( null === $this->value ) {
135+
return true;
136+
}
137+
138+
if ( true === $att_value ) {
139+
$att_value = '';
140+
}
141+
142+
$case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier;
143+
144+
switch ( $this->matcher ) {
145+
case self::MATCH_EXACT:
146+
return $case_insensitive
147+
? 0 === strcasecmp( $att_value, $this->value )
148+
: $att_value === $this->value;
149+
150+
case self::MATCH_ONE_OF_EXACT:
151+
foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) {
152+
if (
153+
$case_insensitive
154+
? 0 === strcasecmp( $val, $this->value )
155+
: $val === $this->value
156+
) {
157+
return true;
158+
}
159+
}
160+
return false;
161+
162+
case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN:
163+
// Attempt the full match first
164+
if (
165+
$case_insensitive
166+
? 0 === strcasecmp( $att_value, $this->value )
167+
: $att_value === $this->value
168+
) {
169+
return true;
170+
}
171+
172+
// Partial match
173+
if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) {
174+
return false;
175+
}
176+
177+
$starts_with = "{$this->value}-";
178+
return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive );
179+
180+
case self::MATCH_PREFIXED_BY:
181+
return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive );
182+
183+
case self::MATCH_SUFFIXED_BY:
184+
return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive );
185+
186+
case self::MATCH_CONTAINS:
187+
return false !== (
188+
$case_insensitive
189+
? stripos( $att_value, $this->value )
190+
: strpos( $att_value, $this->value )
191+
);
192+
}
193+
}
194+
195+
/**
196+
* Splits a string into a list of whitespace delimited values.
197+
*
198+
* This is useful for the {@see WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT} matcher.
199+
*
200+
* @param string $input
201+
*
202+
* @return Generator<string>
203+
*/
204+
private function whitespace_delimited_list( string $input ): Generator {
205+
// Start by skipping whitespace.
206+
$offset = strspn( $input, " \t\r\n\f" );
207+
208+
while ( $offset < strlen( $input ) ) {
209+
// Find the byte length until the next boundary.
210+
$length = strcspn( $input, " \t\r\n\f", $offset );
211+
$value = substr( $input, $offset, $length );
212+
213+
// Move past trailing whitespace.
214+
$offset += $length + strspn( $input, " \t\r\n\f", $offset + $length );
215+
216+
yield $value;
217+
}
218+
}
219+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
final class WP_CSS_Class_Selector implements WP_CSS_HTML_Tag_Processor_Matcher {
4+
public function matches( WP_HTML_Tag_Processor $processor ): bool {
5+
return (bool) $processor->has_class( $this->ident );
6+
}
7+
8+
/** @var string */
9+
public $ident;
10+
11+
public function __construct( string $ident ) {
12+
$this->ident = $ident;
13+
}
14+
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
<?php
2+
/**
3+
* HTML API: WP_CSS_Complex_Selector_List class
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
* @since TBD
8+
*/
9+
10+
/**
11+
* Core class used by the {@see WP_HTML_Processor} to parse and match CSS selectors.
12+
*
13+
* This class is designed for internal use by the HTML processor.
14+
*
15+
* For usage, see {@see WP_HTML_Processor::select()} or {@see WP_HTML_Processor::select_all()}.
16+
*
17+
* This class is instantiated via the {@see WP_CSS_Complex_Selector_List::from_selectors()} method.
18+
* It takes a CSS selector string and returns an instance of itself or `null` if the selector
19+
* is invalid or unsupported.
20+
*
21+
* A subset of the CSS selector grammar is supported. The grammar is defined in the CSS Syntax
22+
* specification, which is available at {@link https://www.w3.org/TR/selectors/#grammar}.
23+
*
24+
* This class is rougly analogous to the <selector-list> in the grammar. See {@see WP_CSS_Compound_Selector_List} for more details on the grammar.
25+
*
26+
* This class supports the same selector syntax as {@see WP_CSS_Compound_Selector_List} as well as:
27+
* - The following combinators:
28+
* - Next sibling (`el + el`)
29+
* - Subsequent sibling (`el ~ el`)
30+
*
31+
* @since TBD
32+
*
33+
* @access private
34+
*/
35+
class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Processor_Matcher {
36+
/**
37+
* Takes a CSS selector string and returns an instance of itself or `null` if the selector
38+
* string is invalid or unsupported.
39+
*
40+
* @since TBD
41+
*
42+
* @param string $input CSS selectors.
43+
* @return static|null
44+
*/
45+
public static function from_selectors( string $input ) {
46+
$input = self::normalize_selector_input( $input );
47+
48+
if ( '' === $input ) {
49+
return null;
50+
}
51+
52+
$offset = 0;
53+
54+
$selector = self::parse_complex_selector( $input, $offset );
55+
if ( null === $selector ) {
56+
return null;
57+
}
58+
self::parse_whitespace( $input, $offset );
59+
60+
$selectors = array( $selector );
61+
while ( $offset < strlen( $input ) ) {
62+
// Each loop should stop on a `,` selector list delimiter.
63+
if ( ',' !== $input[ $offset ] ) {
64+
return null;
65+
}
66+
++$offset;
67+
self::parse_whitespace( $input, $offset );
68+
$selector = self::parse_complex_selector( $input, $offset );
69+
if ( null === $selector ) {
70+
return null;
71+
}
72+
$selectors[] = $selector;
73+
self::parse_whitespace( $input, $offset );
74+
}
75+
76+
return new self( $selectors );
77+
}
78+
79+
/*
80+
* ------------------------------
81+
* Selector parsing functionality
82+
* ------------------------------
83+
*/
84+
85+
/**
86+
* Parses a complex selector.
87+
*
88+
* > <complex-selector> = [ <type-selector> <combinator>? ]* <compound-selector>
89+
*
90+
* @return WP_CSS_Complex_Selector|null
91+
*/
92+
final protected static function parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector {
93+
if ( $offset >= strlen( $input ) ) {
94+
return null;
95+
}
96+
97+
$updated_offset = $offset;
98+
$selector = self::parse_compound_selector( $input, $updated_offset );
99+
if ( null === $selector ) {
100+
return null;
101+
}
102+
103+
$selectors = array( $selector );
104+
$has_preceding_subclass_selector = null !== $selector->subclass_selectors;
105+
106+
$found_whitespace = self::parse_whitespace( $input, $updated_offset );
107+
while ( $updated_offset < strlen( $input ) ) {
108+
if (
109+
WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] ||
110+
WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] ||
111+
WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ]
112+
) {
113+
$combinator = $input[ $updated_offset ];
114+
++$updated_offset;
115+
self::parse_whitespace( $input, $updated_offset );
116+
117+
// Failure to find a selector here is a parse error
118+
$selector = self::parse_compound_selector( $input, $updated_offset );
119+
} elseif ( $found_whitespace ) {
120+
/*
121+
* Whitespace is ambiguous, it could be a descendant combinator or
122+
* insignificant whitespace.
123+
*/
124+
$selector = self::parse_compound_selector( $input, $updated_offset );
125+
if ( null === $selector ) {
126+
break;
127+
}
128+
$combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT;
129+
} else {
130+
break;
131+
}
132+
133+
if ( null === $selector ) {
134+
return null;
135+
}
136+
137+
/*
138+
* Subclass selectors in non-final position is not supported:
139+
* - `div > .className` is valid
140+
* - `.className > div` is not
141+
*/
142+
if ( $has_preceding_subclass_selector ) {
143+
return null;
144+
}
145+
$has_preceding_subclass_selector = null !== $selector->subclass_selectors;
146+
147+
$selectors[] = $combinator;
148+
$selectors[] = $selector;
149+
150+
$found_whitespace = self::parse_whitespace( $input, $updated_offset );
151+
}
152+
$offset = $updated_offset;
153+
return new WP_CSS_Complex_Selector( $selectors );
154+
}
155+
}

0 commit comments

Comments
 (0)