Optimize the processing of text between nodes

stof · stof · commit ced5b845b048 · 2018-11-27T14:16:40.000+01:00
Instead of processing the text token one by one in the main loop, it is
now processed in batch until the next special token (&lt; and &amp; which have
special handling in the main loop and NUL characters which need to report
a parse error).
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
@@ -163,15 +163,21 @@ protected function consumeData()
                     break;
 
                 default:
-                    if ('<' !== $tok && '&' !== $tok) {
-                        // NULL character
-                        if ("\00" === $tok) {
-                            $this->parseError('Received null character.');
-                        }
+                    if ('<' === $tok || '&' === $tok) {
+                        break;
+                    }
+
+                    // NULL character
+                    if ("\00" === $tok) {
+                        $this->parseError('Received null character.');
 
                         $this->text .= $tok;
                         $this->scanner->consume();
+
+                        break;
                     }
+
+                    $this->text .= $this->scanner->charsUntil("<&\0");
             }
         }