Skip to content

Commit a56b438

Browse files
committed
Optimize the handling of references when consuming data
1 parent 9d32f2d commit a56b438

File tree

1 file changed

+8
-22
lines changed

1 file changed

+8
-22
lines changed

src/HTML5/Parser/Tokenizer.php

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,16 @@ public function setTextMode($textmode, $untilTag = null)
121121
*/
122122
protected function consumeData()
123123
{
124-
// Character reference
125-
$this->characterReference();
126-
127124
$tok = $this->scanner->current();
128125

126+
if ($tok === '&') {
127+
// Character reference
128+
$ref = $this->decodeCharacterReference();
129+
$this->buffer($ref);
130+
131+
$tok = $this->scanner->current();
132+
}
133+
129134
// Parse tag
130135
if ($tok === '<') {
131136
// Any buffered text data can go out now.
@@ -303,25 +308,6 @@ protected function eof($tok)
303308
return false;
304309
}
305310

306-
/**
307-
* Handle character references (aka entities).
308-
*
309-
* This version is specific to PCDATA, as it buffers data into the
310-
* text buffer. For a generic version, see decodeCharacterReference().
311-
*
312-
* HTML5 8.2.4.2
313-
*/
314-
protected function characterReference()
315-
{
316-
if ($this->scanner->current() !== '&') {
317-
return false;
318-
}
319-
320-
$ref = $this->decodeCharacterReference();
321-
$this->buffer($ref);
322-
return true;
323-
}
324-
325311
/**
326312
* Look for markup.
327313
*/

0 commit comments

Comments
 (0)