Skip to content

Commit 89e1a7a

Browse files
committed
Optimize the token comparison
Tokens are always a single char. Using strspn to find whether they belong to a fixed list is slower than comparing them directly.
1 parent 54d066b commit 89e1a7a

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

src/HTML5/Parser/Tokenizer.php

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ class Tokenizer
4747
const CONFORMANT_HTML = 'html';
4848
protected $mode = self::CONFORMANT_HTML;
4949

50-
const WHITE = "\t\n\f ";
51-
5250
/**
5351
* Create a new tokenizer.
5452
*
@@ -159,7 +157,7 @@ protected function consumeData()
159157
break;
160158

161159
default:
162-
if (!strspn($tok, '<&')) {
160+
if ('<' !== $tok && '&' !== $tok) {
163161
// NULL character
164162
if ("\00" === $tok) {
165163
$this->parseError('Received null character.');
@@ -193,7 +191,7 @@ protected function characterData()
193191
case Elements::TEXT_RCDATA:
194192
return $this->rcdata($tok);
195193
default:
196-
if (strspn($tok, '<&')) {
194+
if ('<' === $tok || '&' === $tok) {
197195
return false;
198196
}
199197

@@ -1093,7 +1091,7 @@ protected function decodeCharacterReference($inAttribute = false)
10931091

10941092
// These indicate not an entity. We return just
10951093
// the &.
1096-
if (1 === strspn($tok, static::WHITE . '&<')) {
1094+
if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
10971095
// $this->scanner->next();
10981096
return '&';
10991097
}

0 commit comments

Comments
 (0)