Skip to content

Commit ced5b84

Browse files
committed
Optimize the processing of text between nodes
Instead of processing the text token one by one in the main loop, it is now processed in batch until the next special token (< and & which have special handling in the main loop and NUL characters which need to report a parse error).
1 parent 182f34d commit ced5b84

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

src/HTML5/Parser/Tokenizer.php

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,15 +163,21 @@ protected function consumeData()
163163
break;
164164

165165
default:
166-
if ('<' !== $tok && '&' !== $tok) {
167-
// NULL character
168-
if ("\00" === $tok) {
169-
$this->parseError('Received null character.');
170-
}
166+
if ('<' === $tok || '&' === $tok) {
167+
break;
168+
}
169+
170+
// NULL character
171+
if ("\00" === $tok) {
172+
$this->parseError('Received null character.');
171173

172174
$this->text .= $tok;
173175
$this->scanner->consume();
176+
177+
break;
174178
}
179+
180+
$this->text .= $this->scanner->charsUntil("<&\0");
175181
}
176182
}
177183

0 commit comments

Comments
 (0)