diff --git a/src/base/encoding.rs b/src/base/encoding.rs
index 92c517d8..5c525e85 100644
--- a/src/base/encoding.rs
+++ b/src/base/encoding.rs
@@ -80,7 +80,8 @@ impl SharedEncoding {
     #[must_use]
     pub fn get(&self) -> &'static Encoding {
         let encoding = self.encoding.load(Ordering::Relaxed);
-        ALL_ENCODINGS[encoding]
+        // it will never be out of range, but get() avoids a panic branch
+        ALL_ENCODINGS.get(encoding).unwrap_or(&ALL_ENCODINGS[0])
     }
 
     pub fn set(&self, encoding: AsciiCompatibleEncoding) {
diff --git a/src/rewritable_units/mod.rs b/src/rewritable_units/mod.rs
index 069a3fda..f7687a3d 100644
--- a/src/rewritable_units/mod.rs
+++ b/src/rewritable_units/mod.rs
@@ -1,10 +1,14 @@
 use std::any::Any;
 
+pub(crate) use self::mutations::{Mutations, StringChunk};
+pub(crate) use self::text_decoder::TextDecoder;
+pub(crate) use self::text_encoder::{IncompleteUtf8Resync, TextEncoder};
+
 pub use self::document_end::*;
 pub use self::element::*;
 pub use self::mutations::{ContentType, StreamingHandler};
-pub(crate) use self::mutations::{Mutations, StringChunk};
-pub use self::text_encoder::{StreamingHandlerSink, Utf8Error};
+pub use self::streaming_sink::StreamingHandlerSink;
+pub use self::text_encoder::Utf8Error;
 pub use self::tokens::*;
 
 /// Data that can be attached to a rewritable unit by a user and shared between content handler
@@ -85,6 +89,8 @@ mod mutations;
 
 mod document_end;
 mod element;
+mod streaming_sink;
+mod text_decoder;
 mod text_encoder;
 mod tokens;
 
diff --git a/src/rewritable_units/streaming_sink.rs b/src/rewritable_units/streaming_sink.rs
new file mode 100644
index 00000000..0652eb54
--- /dev/null
+++ b/src/rewritable_units/streaming_sink.rs
@@ -0,0 +1,233 @@
+use super::{ContentType, IncompleteUtf8Resync, TextEncoder, Utf8Error};
+use crate::html::escape_body_text;
+use encoding_rs::{Encoding, UTF_8};
+
+/// Used to write chunks of text or markup in streaming mutation handlers.
+///
+/// Argument to [`StreamingHandler::write_all()`](crate::html_content::StreamingHandler::write_all).
+pub struct StreamingHandlerSink<'output_handler> {
+    incomplete_utf8: IncompleteUtf8Resync,
+    inner: StreamingHandlerSinkInner<'output_handler>,
+}
+
+struct StreamingHandlerSinkInner<'output_handler> {
+    non_utf8_encoder: Option<TextEncoder>,
+
+    /// ```compile_fail
+    /// use lol_html::html_content::StreamingHandlerSink;
+    /// struct IsSend<T: Send>(T);
+    /// let x: IsSend<StreamingHandlerSink<'static>>;
+    /// ```
+    ///
+    /// ```compile_fail
+    /// use lol_html::html_content::StreamingHandlerSink;
+    /// struct IsSync<T: Sync>(T);
+    /// let x: IsSync<StreamingHandlerSink<'static>>;
+    /// ```
+    output_handler: &'output_handler mut dyn FnMut(&[u8]),
+}
+
+impl<'output_handler> StreamingHandlerSink<'output_handler> {
+    #[inline(always)]
+    pub(crate) fn new(
+        encoding: &'static Encoding,
+        output_handler: &'output_handler mut dyn FnMut(&[u8]),
+    ) -> Self {
+        Self {
+            incomplete_utf8: IncompleteUtf8Resync::new(),
+            inner: StreamingHandlerSinkInner {
+                non_utf8_encoder: (encoding != UTF_8).then(|| TextEncoder::new(encoding)),
+                output_handler,
+            },
+        }
+    }
+
+    /// Writes the given UTF-8 string to the output, converting the encoding and [escaping](ContentType) if necessary.
+    ///
+    /// It may be called multiple times. The strings will be concatenated together.
+    #[inline]
+    pub fn write_str(&mut self, content: &str, content_type: ContentType) {
+        if self.incomplete_utf8.discard_incomplete() {
+            // too late to report the error to the caller of write_utf8_chunk
+            self.inner.write_html("\u{FFFD}");
+        }
+        self.inner.write_str(content, content_type);
+    }
+
+    #[inline]
+    pub(crate) fn output_handler(&mut self) -> &mut dyn FnMut(&[u8]) {
+        &mut self.inner.output_handler
+    }
+
+    /// Writes as much of the given UTF-8 fragment as possible, converting the encoding and [escaping](ContentType) if necessary.
+    ///
+    /// The `content` doesn't need to be a complete UTF-8 string, as long as consecutive calls to `write_utf8_bytes` create a valid UTF-8 string.
+    /// Any incomplete UTF-8 sequence at the end of the content is buffered and flushed as soon as it's completed.
+    ///
+    /// Other methods like `write_str_chunk` should not be called after a `write_utf8_bytes` call with an incomplete UTF-8 sequence.
+    #[inline]
+    pub fn write_utf8_chunk(
+        &mut self,
+        mut content: &[u8],
+        content_type: ContentType,
+    ) -> Result<(), Utf8Error> {
+        while !content.is_empty() {
+            let (valid_chunk, rest) = self.incomplete_utf8.utf8_bytes_to_slice(content)?;
+            content = rest;
+            if !valid_chunk.is_empty() {
+                self.inner.write_str(valid_chunk, content_type);
+            }
+        }
+        Ok(())
+    }
+}
+
+impl StreamingHandlerSinkInner<'_> {
+    #[inline]
+    pub(crate) fn write_str(&mut self, content: &str, content_type: ContentType) {
+        match content_type {
+            ContentType::Html => self.write_html(content),
+            ContentType::Text => self.write_body_text(content),
+        }
+    }
+
+    pub(crate) fn write_html(&mut self, html: &str) {
+        if let Some(encoder) = &mut self.non_utf8_encoder {
+            encoder.encode(html, self.output_handler);
+        } else if !html.is_empty() {
+            (self.output_handler)(html.as_bytes());
+        }
+    }
+
+    /// For text content, not attributes
+    pub(crate) fn write_body_text(&mut self, plaintext: &str) {
+        if let Some(encoder) = &mut self.non_utf8_encoder {
+            escape_body_text(plaintext, &mut |chunk| {
+                debug_assert!(!chunk.is_empty());
+                encoder.encode(chunk, self.output_handler);
+            });
+        } else {
+            escape_body_text(plaintext, &mut |chunk| {
+                debug_assert!(!chunk.is_empty());
+                (self.output_handler)(chunk.as_bytes());
+            });
+        }
+    }
+}
+
+#[test]
+fn utf8_fragments() {
+    let text = "🐈°文字化けしない ▀▄ ɯopuɐɹ ⓤⓝⓘⓒⓞⓓⓔ and ascii 🐳 sʇuıodǝpoɔ ✴";
+    for with_zero_writes in [false, true] {
+        for len in 1..9 {
+            let mut out = Vec::new();
+            let mut handler = |ch: &[u8]| out.extend_from_slice(ch);
+            let mut t = StreamingHandlerSink::new(UTF_8, &mut handler);
+            for (nth, chunk) in text.as_bytes().chunks(len).enumerate() {
+                let msg =
+                    format!("{len} at {nth} '{chunk:?}'; with_zero_writes={with_zero_writes}");
+                if with_zero_writes {
+                    t.write_utf8_chunk(b"", ContentType::Text).expect(&msg);
+                }
+                t.write_utf8_chunk(chunk, ContentType::Html).expect(&msg);
+            }
+            drop(t);
+            assert_eq!(String::from_utf8_lossy(&out), text, "{len}");
+        }
+    }
+}
+
+#[test]
+fn long_text() {
+    let mut written = 0;
+    let mut expected = 0;
+    let mut handler = |ch: &[u8]| {
+        assert!(
+            ch.iter().all(|&c| {
+                written += 1;
+                c == if 0 != written & 1 {
+                    177
+                } else {
+                    b'0' + ((written / 2 - 1) % 10) as u8
+                }
+            }),
+            "@{written} {ch:?}"
+        );
+    };
+    let mut t = StreamingHandlerSink::new(encoding_rs::ISO_8859_2, &mut handler);
+
+    let mut s = "ą0ą1ą2ą3ą4ą5ą6ą7ą8ą9".repeat(128);
+    let mut split_point = 1;
+    while s.len() <= 1 << 17 {
+        s.push_str(&s.clone());
+        expected += s.chars().count();
+        let (a, b) = s.as_bytes().split_at(split_point);
+        split_point += 13;
+        t.write_utf8_chunk(a, ContentType::Text).unwrap();
+        t.write_utf8_chunk(b, ContentType::Html).unwrap();
+    }
+    assert_eq!(expected, written);
+}
+
+#[test]
+fn invalid_utf8_fragments() {
+    #[rustfmt::skip]
+    let broken_utf8 = &[
+        &b"\x31\x32\x33\xED\xA0\x80\x31"[..], b"\x31\x32\x33\xEF\x80", b"\x31\x32\x33\xEF\x80\xF0\x3c",
+         b"\x37\x38\x39\xFE", b"\x37\x38\xFE", b"\x37\xFF", b"\x3c\x23\x24\xFE\x3C", b"\x3C\x23\xFE\x3C\x3C",
+         b"\x3C\x3D\xE0\x80\x3C", b"\x3C\x3D\xE0\x80\xAF\x3C", b"\x3C\x3D\xE0\x80\xE0\x80\x3C",
+         b"\x3C\x3D\xED\xA0\x80\x3C", b"\x3C\x3D\xF0\x80\x80\x3C", b"\x3C\x3D\xF0\x80\x80\x80\x3C",
+         b"\x3C\x3D\xF7\xBF\xBF\xBF\x3C", b"\x3C\x3D\xFF\x3C", b"\x7F", b"\x80", b"\x80\x3C",
+         b"\x80\x81\x82\x83\x84\x85\x86\x87", b"\x80\xBF", b"\x80\xBF\x80", b"\x80\xBF\x80\xBF",
+         b"\x80\xBF\x80\xBF\x80", b"\x80\xBF\x80\xBF\x80\xBF", b"\x81", b"\x81\x3C",
+         b"\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F", b"\x90\x91\x92\x93\x94\x95\x96\x97", b"\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F",
+         b"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7", b"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF", b"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7",
+         b"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", b"\xBF", b"\xC0", b"\xC0\x3C\xC1\x3C\xC2\x3C\xC3\x3C", b"\xC0\x80",
+         b"\xC0\xAF", b"\xC0\xAF\xE0\x80\xBF\xF0\x81\x82\x41", b"\xC1\x3C", b"\xC1\xBF", b"\xC1\xBF", b"\xC2\x00",
+         b"\xC2\x41\x42", b"\xC2\x7F", b"\xC2\xC0", b"\xC2\xFF", b"\xC4\x3C\xC5\x3C\xC6\x3C\xC7\x3C",
+         b"\xC8\x3C\xC9\x3C\xCA\x3C\xCB\x3C", b"\xCC\x3C\xCD\x3C\xCE\x3C\xCF\x3C", b"\xD0\x3C\xD1\x3C\xD2\x3C\xD3\x3C",
+         b"\xD4\x3C\xD5\x3C\xD6\x3C\xD7\x3C", b"\xD8\x3C\xD9\x3C\xDA\x3C\xDB\x3C", b"\xDC\x3C\xDD\x3C\xDE\x3C\xDF\x3C",
+         b"\xDF", b"\xDF\x00", b"\xDF\x7F", b"\xDF\xC0", b"\xDF\xFF", b"\xE0\x3C\xE1\x3C\xE2\x3C\xE3\x3C", b"\xE0\x80",
+         b"\xE0\x80\x00", b"\xE0\x80\x7F", b"\xE0\x80\x80", b"\xE0\x80\xAF", b"\xE0\x80\xC0", b"\xE0\x80\xFF",
+         b"\xE0\x81\xBF", b"\xE0\x9F\xBF", b"\xE1\x80\xE2\xF0\x91\x92\xF1\xBF\x41",
+         b"\xE4\x3C\xE5\x3C\xE6\x3C\xE7\x3C", b"\xE8\x3C\xE9\x3C\xEA\x3C\xEB\x3C", b"\xEC\x3C\xED\x3C\xEE\x3C\xEF\x3C",
+         b"\xED\x80\x00", b"\xED\x80\x7F", b"\xED\x80\xC0", b"\xED\x80\xFF", b"\xED\xA0\x80", b"\xED\xA0\x80\x35",
+         b"\xED\xA0\x80\xED\xB0\x80", b"\xED\xA0\x80\xED\xBF\xBF", b"\xED\xA0\x80\xED\xBF\xBF\xED\xAF\x41",
+         b"\xED\xAD\xBF", b"\xED\xAD\xBF\xED\xB0\x80", b"\xED\xAD\xBF\xED\xBF\xBF", b"\xED\xAE\x80",
+         b"\xED\xAE\x80\xED\xB0\x80", b"\xED\xAE\x80\xED\xBF\xBF", b"\xED\xAF\xBF", b"\xED\xAF\xBF\xED\xB0\x80",
+         b"\xED\xAF\xBF\xED\xBF\xBF", b"\xED\xB0\x80", b"\xED\xBE\x80", b"\xED\xBF\xBF", b"\xEF\xBF",
+         b"\xF0\x3C\xF1\x3C", b"\xF0\x80\x80", b"\xF0\x80\x80\x80", b"\xF0\x80\x80\xAF", b"\xF0\x80\x81\xBF",
+         b"\xF0\x8F\xBF\xBF", b"\xF0\x90\x80\x00", b"\xF0\x90\x80\x7F", b"\xF0\x90\x80\xC0", b"\xF0\x90\x80\xFF",
+         b"\xF1\x80\x80\x00", b"\xF1\x80\x80\x7F", b"\xF1\x80\x80\xC0", b"\xF1\x80\x80\xFF", b"\xF2\x3C\xF3\x3C",
+         b"\xF4\x3C\xF5\x3C", b"\xF4\x80\x80\x00", b"\xF4\x80\x80\x7F", b"\xF4\x80\x80\xC0", b"\xF4\x80\x80\xFF",
+         b"\xF4\x90\x80\x80", b"\xF4\x91\x92\x93\xFF\x41\x80\xBF\x42", b"\xF5\x3C", b"\xF6\x3C\xF7\x3C",
+         b"\xF7\xBF\xBF", b"\xF7\xBF\xBF\xBF", b"\xF7\xBF\xBF\xBF\xBF", b"\xF7\xBF\xBF\xBF\xBF\xBF",
+         b"\xF7\xBF\xBF\xBF\xBF\xBF\xBF", b"\xF8\x3C", b"\xF8\x80\x80\x80", b"\xF8\x80\x80\x80\xAF",
+         b"\xF8\x87\xBF\xBF\xBF", b"\xF8\x88\x80\x80\x80", b"\xF9\x3C", b"\xFA\x3C", b"\xFB\x3C", b"\xFB\xBF\xBF\xBF",
+         b"\xFC\x3C", b"\xFC\x80\x80\x80\x80", b"\xFC\x80\x80\x80\x80\xAF", b"\xFC\x84\x80\x80\x80\x80", b"\xFD\x3C",
+         b"\xFD\xBF\xBF\xBF\xBF", b"\xFE", b"\xFF", b"\xFF\x3C"
+    ];
+
+    for bad in broken_utf8 {
+        'next: for len in 1..bad.len() {
+            let mut handler = |ch: &[u8]| {
+                assert!(
+                    !std::str::from_utf8(ch).unwrap().contains('<'),
+                    "{ch:x?} of {bad:x?}"
+                );
+            };
+            let mut t = StreamingHandlerSink::new(UTF_8, &mut handler);
+            for chunk in bad.chunks(len) {
+                if t.write_utf8_chunk(chunk, ContentType::Text).is_err() {
+                    continue 'next;
+                }
+            }
+            // An ASCII write forces flush of an incomplete sequence
+            assert!(
+                t.write_utf8_chunk(b"<", ContentType::Text).is_err(),
+                "Shouldn't have allowed {bad:?} {}",
+                String::from_utf8_lossy(bad)
+            );
+        }
+    }
+}
diff --git a/src/rewritable_units/text_decoder.rs b/src/rewritable_units/text_decoder.rs
new file mode 100644
index 00000000..9fe325f4
--- /dev/null
+++ b/src/rewritable_units/text_decoder.rs
@@ -0,0 +1,125 @@
+use crate::base::SharedEncoding;
+use crate::rewriter::RewritingError;
+use encoding_rs::{CoderResult, Decoder, Encoding, UTF_8};
+
+pub(crate) struct TextDecoder {
+    encoding: SharedEncoding,
+    pending_text_streaming_decoder: Option<Decoder>,
+    text_buffer: String,
+}
+
+impl TextDecoder {
+    #[inline]
+    #[must_use]
+    pub fn new(encoding: SharedEncoding) -> Self {
+        Self {
+            encoding,
+            pending_text_streaming_decoder: None,
+            // TODO make adjustable
+            text_buffer: String::from_utf8(vec![0u8; 1024]).unwrap(),
+        }
+    }
+
+    #[inline]
+    pub fn flush_pending(
+        &mut self,
+        output_handler: &mut dyn FnMut(&str, bool, &'static Encoding) -> Result<(), RewritingError>,
+    ) -> Result<(), RewritingError> {
+        if self.pending_text_streaming_decoder.is_some() {
+            self.feed_text(&[], true, output_handler)?;
+        }
+        Ok(())
+    }
+
+    #[inline(never)]
+    pub fn feed_text(
+        &mut self,
+        mut raw_input: &[u8],
+        last_in_text_node: bool,
+        output_handler: &mut dyn FnMut(&str, bool, &'static Encoding) -> Result<(), RewritingError>,
+    ) -> Result<(), RewritingError> {
+        let encoding = self.encoding.get();
+
+        if let Some((utf8_text, rest)) = self.split_utf8_start(raw_input, encoding) {
+            raw_input = rest;
+            let really_last = last_in_text_node && rest.is_empty();
+
+            (output_handler)(utf8_text, really_last, encoding)?;
+
+            if really_last {
+                debug_assert!(self.pending_text_streaming_decoder.is_none());
+                return Ok(());
+            }
+        };
+
+        let decoder = self
+            .pending_text_streaming_decoder
+            .get_or_insert_with(|| encoding.new_decoder_without_bom_handling());
+
+        loop {
+            let buffer = self.text_buffer.as_mut_str();
+            let (status, read, written, ..) =
+                decoder.decode_to_str(raw_input, buffer, last_in_text_node);
+
+            let finished_decoding = status == CoderResult::InputEmpty;
+
+            if written > 0 || last_in_text_node {
+                // the last call to feed_text() may make multiple calls to output_handler,
+                // but only one call to output_handler can be *the* last one.
+                let really_last = last_in_text_node && finished_decoding;
+
+                (output_handler)(
+                    // this will always be in bounds, but unwrap_or_default optimizes better
+                    buffer.get(..written).unwrap_or_default(),
+                    really_last,
+                    encoding,
+                )?;
+            }
+
+            if finished_decoding {
+                if last_in_text_node {
+                    self.pending_text_streaming_decoder = None;
+                }
+                return Ok(());
+            }
+            raw_input = raw_input.get(read..).unwrap_or_default();
+        }
+    }
+
+    /// Fast path for UTF-8 or ASCII prefix
+    ///
+    /// Returns UTF-8 text to emit + remaining bytes, or `None` if the fast path is not available
+    #[inline]
+    fn split_utf8_start<'i>(
+        &self,
+        raw_input: &'i [u8],
+        encoding: &'static Encoding,
+    ) -> Option<(&'i str, &'i [u8])> {
+        // Can't use the fast path if the decoder may have buffered some bytes
+        if self.pending_text_streaming_decoder.is_some() {
+            return None;
+        }
+
+        let text_or_len = if encoding == UTF_8 {
+            std::str::from_utf8(raw_input).map_err(|err| err.valid_up_to())
+        } else {
+            debug_assert!(encoding.is_ascii_compatible());
+            Err(Encoding::ascii_valid_up_to(raw_input))
+        };
+
+        match text_or_len {
+            Ok(utf8_text) => Some((utf8_text, &[][..])),
+            Err(valid_up_to) => {
+                // The slow path buffers 1KB, and even though this shouldn't matter,
+                // it is an observable behavior, and it makes bugs worse for text handlers
+                // that assume they'll get only a single chunk.
+                if valid_up_to != raw_input.len() && valid_up_to < self.text_buffer.len() {
+                    return None;
+                }
+
+                let (text, rest) = raw_input.split_at_checked(valid_up_to)?;
+                Some((std::str::from_utf8(text).ok()?, rest))
+            }
+        }
+    }
+}
diff --git a/src/rewritable_units/text_encoder.rs b/src/rewritable_units/text_encoder.rs
index 93daf086..de35c74a 100644
--- a/src/rewritable_units/text_encoder.rs
+++ b/src/rewritable_units/text_encoder.rs
@@ -1,5 +1,3 @@
-use super::ContentType;
-use crate::html::escape_body_text;
 use encoding_rs::{CoderResult, Encoder, Encoding, UTF_8};
 use thiserror::Error;
 
@@ -12,119 +10,6 @@ use thiserror::Error;
 #[error("Invalid UTF-8")]
 pub struct Utf8Error;
 
-/// Used to write chunks of text or markup in streaming mutation handlers.
-///
-/// Argument to [`StreamingHandler::write_all()`](crate::html_content::StreamingHandler::write_all).
-pub struct StreamingHandlerSink<'output_handler> {
-    incomplete_utf8: IncompleteUtf8Resync,
-    inner: StreamingHandlerSinkInner<'output_handler>,
-}
-
-struct StreamingHandlerSinkInner<'output_handler> {
-    non_utf8_encoder: Option<TextEncoder>,
-
-    /// ```compile_fail
-    /// use lol_html::html_content::StreamingHandlerSink;
-    /// struct IsSend<T: Send>(T);
-    /// let x: IsSend<StreamingHandlerSink<'static>>;
-    /// ```
-    ///
-    /// ```compile_fail
-    /// use lol_html::html_content::StreamingHandlerSink;
-    /// struct IsSync<T: Sync>(T);
-    /// let x: IsSync<StreamingHandlerSink<'static>>;
-    /// ```
-    output_handler: &'output_handler mut dyn FnMut(&[u8]),
-}
-
-impl<'output_handler> StreamingHandlerSink<'output_handler> {
-    #[inline(always)]
-    pub(crate) fn new(
-        encoding: &'static Encoding,
-        output_handler: &'output_handler mut dyn FnMut(&[u8]),
-    ) -> Self {
-        Self {
-            incomplete_utf8: IncompleteUtf8Resync::new(),
-            inner: StreamingHandlerSinkInner {
-                non_utf8_encoder: (encoding != UTF_8).then(|| TextEncoder::new(encoding)),
-                output_handler,
-            },
-        }
-    }
-
-    /// Writes the given UTF-8 string to the output, converting the encoding and [escaping](ContentType) if necessary.
-    ///
-    /// It may be called multiple times. The strings will be concatenated together.
-    #[inline]
-    pub fn write_str(&mut self, content: &str, content_type: ContentType) {
-        if self.incomplete_utf8.discard_incomplete() {
-            // too late to report the error to the caller of write_utf8_chunk
-            self.inner.write_html("\u{FFFD}");
-        }
-        self.inner.write_str(content, content_type);
-    }
-
-    #[inline]
-    pub(crate) fn output_handler(&mut self) -> &mut dyn FnMut(&[u8]) {
-        &mut self.inner.output_handler
-    }
-
-    /// Writes as much of the given UTF-8 fragment as possible, converting the encoding and [escaping](ContentType) if necessary.
-    ///
-    /// The `content` doesn't need to be a complete UTF-8 string, as long as consecutive calls to `write_utf8_bytes` create a valid UTF-8 string.
-    /// Any incomplete UTF-8 sequence at the end of the content is buffered and flushed as soon as it's completed.
-    ///
-    /// Other methods like `write_str_chunk` should not be called after a `write_utf8_bytes` call with an incomplete UTF-8 sequence.
-    #[inline]
-    pub fn write_utf8_chunk(
-        &mut self,
-        mut content: &[u8],
-        content_type: ContentType,
-    ) -> Result<(), Utf8Error> {
-        while !content.is_empty() {
-            let (valid_chunk, rest) = self.incomplete_utf8.utf8_bytes_to_slice(content)?;
-            content = rest;
-            if !valid_chunk.is_empty() {
-                self.inner.write_str(valid_chunk, content_type);
-            }
-        }
-        Ok(())
-    }
-}
-
-impl StreamingHandlerSinkInner<'_> {
-    #[inline]
-    pub(crate) fn write_str(&mut self, content: &str, content_type: ContentType) {
-        match content_type {
-            ContentType::Html => self.write_html(content),
-            ContentType::Text => self.write_body_text(content),
-        }
-    }
-
-    pub(crate) fn write_html(&mut self, html: &str) {
-        if let Some(encoder) = &mut self.non_utf8_encoder {
-            encoder.encode(html, self.output_handler);
-        } else if !html.is_empty() {
-            (self.output_handler)(html.as_bytes());
-        }
-    }
-
-    /// For text content, not attributes
-    pub(crate) fn write_body_text(&mut self, plaintext: &str) {
-        if let Some(encoder) = &mut self.non_utf8_encoder {
-            escape_body_text(plaintext, &mut |chunk| {
-                debug_assert!(!chunk.is_empty());
-                encoder.encode(chunk, self.output_handler);
-            });
-        } else {
-            escape_body_text(plaintext, &mut |chunk| {
-                debug_assert!(!chunk.is_empty());
-                (self.output_handler)(chunk.as_bytes());
-            });
-        }
-    }
-}
-
 /// Temporary buffer used for encoding_rs output
 enum Buffer {
     /// Stack buffer avoids heap allocation, and lets go back quickly to the ASCII fast path.
@@ -157,7 +42,7 @@ impl Buffer {
     }
 }
 
-struct TextEncoder {
+pub(crate) struct TextEncoder {
     encoder: Encoder,
     buffer: Buffer,
 }
@@ -177,7 +62,7 @@ impl TextEncoder {
     /// without heap allocations.
     /// It also avoids methods that have UB: <https://github.com/hsivonen/encoding_rs/issues/79>
     #[inline(never)]
-    fn encode(&mut self, mut content: &str, output_handler: &mut dyn FnMut(&[u8])) {
+    pub fn encode(&mut self, mut content: &str, output_handler: &mut dyn FnMut(&[u8])) {
         loop {
             // First, fast path for ASCII-only prefix
             debug_assert!(!self.encoder.has_pending_state()); // ASCII-compatible encodings are not supposed to have it
@@ -235,7 +120,7 @@ const fn utf8_width(b: u8) -> u8 {
 }
 
 /// Stitches together UTF-8 from byte writes that may split UTF-8 sequences into multiple fragments
-struct IncompleteUtf8Resync {
+pub(crate) struct IncompleteUtf8Resync {
     /// Buffers an incomplete UTF-8 sequence
     char_bytes: [u8; 4],
     /// Number of bytes in `bytes`
@@ -325,28 +210,6 @@ impl IncompleteUtf8Resync {
     }
 }
 
-#[test]
-fn utf8_fragments() {
-    let text = "🐈°文字化けしない ▀▄ ɯopuɐɹ ⓤⓝⓘⓒⓞⓓⓔ and ascii 🐳 sʇuıodǝpoɔ ✴";
-    for with_zero_writes in [false, true] {
-        for len in 1..9 {
-            let mut out = Vec::new();
-            let mut handler = |ch: &[u8]| out.extend_from_slice(ch);
-            let mut t = StreamingHandlerSink::new(UTF_8, &mut handler);
-            for (nth, chunk) in text.as_bytes().chunks(len).enumerate() {
-                let msg =
-                    format!("{len} at {nth} '{chunk:?}'; with_zero_writes={with_zero_writes}");
-                if with_zero_writes {
-                    t.write_utf8_chunk(b"", ContentType::Text).expect(&msg);
-                }
-                t.write_utf8_chunk(chunk, ContentType::Html).expect(&msg);
-            }
-            drop(t);
-            assert_eq!(String::from_utf8_lossy(&out), text, "{len}");
-        }
-    }
-}
-
 #[test]
 fn chars() {
     let boundaries = "🐈°文字化けしない"
@@ -362,98 +225,3 @@ fn chars() {
         .collect::<String>();
     assert_eq!("4...2.3..3..3..3..3..3..3..", boundaries);
 }
-
-#[test]
-fn long_text() {
-    let mut written = 0;
-    let mut expected = 0;
-    let mut handler = |ch: &[u8]| {
-        assert!(
-            ch.iter().all(|&c| {
-                written += 1;
-                c == if 0 != written & 1 {
-                    177
-                } else {
-                    b'0' + ((written / 2 - 1) % 10) as u8
-                }
-            }),
-            "@{written} {ch:?}"
-        );
-    };
-    let mut t = StreamingHandlerSink::new(encoding_rs::ISO_8859_2, &mut handler);
-
-    let mut s = "ą0ą1ą2ą3ą4ą5ą6ą7ą8ą9".repeat(128);
-    let mut split_point = 1;
-    while s.len() <= 1 << 17 {
-        s.push_str(&s.clone());
-        expected += s.chars().count();
-        let (a, b) = s.as_bytes().split_at(split_point);
-        split_point += 13;
-        t.write_utf8_chunk(a, ContentType::Text).unwrap();
-        t.write_utf8_chunk(b, ContentType::Html).unwrap();
-    }
-    assert_eq!(expected, written);
-}
-
-#[test]
-fn invalid_utf8_fragments() {
-    #[rustfmt::skip]
-    let broken_utf8 = &[
-        &b"\x31\x32\x33\xED\xA0\x80\x31"[..], b"\x31\x32\x33\xEF\x80", b"\x31\x32\x33\xEF\x80\xF0\x3c",
-         b"\x37\x38\x39\xFE", b"\x37\x38\xFE", b"\x37\xFF", b"\x3c\x23\x24\xFE\x3C", b"\x3C\x23\xFE\x3C\x3C",
-         b"\x3C\x3D\xE0\x80\x3C", b"\x3C\x3D\xE0\x80\xAF\x3C", b"\x3C\x3D\xE0\x80\xE0\x80\x3C",
-         b"\x3C\x3D\xED\xA0\x80\x3C", b"\x3C\x3D\xF0\x80\x80\x3C", b"\x3C\x3D\xF0\x80\x80\x80\x3C",
-         b"\x3C\x3D\xF7\xBF\xBF\xBF\x3C", b"\x3C\x3D\xFF\x3C", b"\x7F", b"\x80", b"\x80\x3C",
-         b"\x80\x81\x82\x83\x84\x85\x86\x87", b"\x80\xBF", b"\x80\xBF\x80", b"\x80\xBF\x80\xBF",
-         b"\x80\xBF\x80\xBF\x80", b"\x80\xBF\x80\xBF\x80\xBF", b"\x81", b"\x81\x3C",
-         b"\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F", b"\x90\x91\x92\x93\x94\x95\x96\x97", b"\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F",
-         b"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7", b"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF", b"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7",
-         b"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", b"\xBF", b"\xC0", b"\xC0\x3C\xC1\x3C\xC2\x3C\xC3\x3C", b"\xC0\x80",
-         b"\xC0\xAF", b"\xC0\xAF\xE0\x80\xBF\xF0\x81\x82\x41", b"\xC1\x3C", b"\xC1\xBF", b"\xC1\xBF", b"\xC2\x00",
-         b"\xC2\x41\x42", b"\xC2\x7F", b"\xC2\xC0", b"\xC2\xFF", b"\xC4\x3C\xC5\x3C\xC6\x3C\xC7\x3C",
-         b"\xC8\x3C\xC9\x3C\xCA\x3C\xCB\x3C", b"\xCC\x3C\xCD\x3C\xCE\x3C\xCF\x3C", b"\xD0\x3C\xD1\x3C\xD2\x3C\xD3\x3C",
-         b"\xD4\x3C\xD5\x3C\xD6\x3C\xD7\x3C", b"\xD8\x3C\xD9\x3C\xDA\x3C\xDB\x3C", b"\xDC\x3C\xDD\x3C\xDE\x3C\xDF\x3C",
-         b"\xDF", b"\xDF\x00", b"\xDF\x7F", b"\xDF\xC0", b"\xDF\xFF", b"\xE0\x3C\xE1\x3C\xE2\x3C\xE3\x3C", b"\xE0\x80",
-         b"\xE0\x80\x00", b"\xE0\x80\x7F", b"\xE0\x80\x80", b"\xE0\x80\xAF", b"\xE0\x80\xC0", b"\xE0\x80\xFF",
-         b"\xE0\x81\xBF", b"\xE0\x9F\xBF", b"\xE1\x80\xE2\xF0\x91\x92\xF1\xBF\x41",
-         b"\xE4\x3C\xE5\x3C\xE6\x3C\xE7\x3C", b"\xE8\x3C\xE9\x3C\xEA\x3C\xEB\x3C", b"\xEC\x3C\xED\x3C\xEE\x3C\xEF\x3C",
-         b"\xED\x80\x00", b"\xED\x80\x7F", b"\xED\x80\xC0", b"\xED\x80\xFF", b"\xED\xA0\x80", b"\xED\xA0\x80\x35",
-         b"\xED\xA0\x80\xED\xB0\x80", b"\xED\xA0\x80\xED\xBF\xBF", b"\xED\xA0\x80\xED\xBF\xBF\xED\xAF\x41",
-         b"\xED\xAD\xBF", b"\xED\xAD\xBF\xED\xB0\x80", b"\xED\xAD\xBF\xED\xBF\xBF", b"\xED\xAE\x80",
-         b"\xED\xAE\x80\xED\xB0\x80", b"\xED\xAE\x80\xED\xBF\xBF", b"\xED\xAF\xBF", b"\xED\xAF\xBF\xED\xB0\x80",
-         b"\xED\xAF\xBF\xED\xBF\xBF", b"\xED\xB0\x80", b"\xED\xBE\x80", b"\xED\xBF\xBF", b"\xEF\xBF",
-         b"\xF0\x3C\xF1\x3C", b"\xF0\x80\x80", b"\xF0\x80\x80\x80", b"\xF0\x80\x80\xAF", b"\xF0\x80\x81\xBF",
-         b"\xF0\x8F\xBF\xBF", b"\xF0\x90\x80\x00", b"\xF0\x90\x80\x7F", b"\xF0\x90\x80\xC0", b"\xF0\x90\x80\xFF",
-         b"\xF1\x80\x80\x00", b"\xF1\x80\x80\x7F", b"\xF1\x80\x80\xC0", b"\xF1\x80\x80\xFF", b"\xF2\x3C\xF3\x3C",
-         b"\xF4\x3C\xF5\x3C", b"\xF4\x80\x80\x00", b"\xF4\x80\x80\x7F", b"\xF4\x80\x80\xC0", b"\xF4\x80\x80\xFF",
-         b"\xF4\x90\x80\x80", b"\xF4\x91\x92\x93\xFF\x41\x80\xBF\x42", b"\xF5\x3C", b"\xF6\x3C\xF7\x3C",
-         b"\xF7\xBF\xBF", b"\xF7\xBF\xBF\xBF", b"\xF7\xBF\xBF\xBF\xBF", b"\xF7\xBF\xBF\xBF\xBF\xBF",
-         b"\xF7\xBF\xBF\xBF\xBF\xBF\xBF", b"\xF8\x3C", b"\xF8\x80\x80\x80", b"\xF8\x80\x80\x80\xAF",
-         b"\xF8\x87\xBF\xBF\xBF", b"\xF8\x88\x80\x80\x80", b"\xF9\x3C", b"\xFA\x3C", b"\xFB\x3C", b"\xFB\xBF\xBF\xBF",
-         b"\xFC\x3C", b"\xFC\x80\x80\x80\x80", b"\xFC\x80\x80\x80\x80\xAF", b"\xFC\x84\x80\x80\x80\x80", b"\xFD\x3C",
-         b"\xFD\xBF\xBF\xBF\xBF", b"\xFE", b"\xFF", b"\xFF\x3C"
-    ];
-
-    for bad in broken_utf8 {
-        'next: for len in 1..bad.len() {
-            let mut handler = |ch: &[u8]| {
-                assert!(
-                    !std::str::from_utf8(ch).unwrap().contains('<'),
-                    "{ch:x?} of {bad:x?}"
-                );
-            };
-            let mut t = StreamingHandlerSink::new(UTF_8, &mut handler);
-            for chunk in bad.chunks(len) {
-                if t.write_utf8_chunk(chunk, ContentType::Text).is_err() {
-                    continue 'next;
-                }
-            }
-            // An ASCII write forces flush of an incomplete sequence
-            assert!(
-                t.write_utf8_chunk(b"<", ContentType::Text).is_err(),
-                "Shouldn't have allowed {bad:?} {}",
-                String::from_utf8_lossy(bad)
-            );
-        }
-    }
-}
diff --git a/src/rewritable_units/tokens/capturer/mod.rs b/src/rewritable_units/tokens/capturer/mod.rs
index 4a9957ad..22c691a2 100644
--- a/src/rewritable_units/tokens/capturer/mod.rs
+++ b/src/rewritable_units/tokens/capturer/mod.rs
@@ -1,15 +1,8 @@
-mod text_decoder;
 mod to_token;
 
-use self::text_decoder::TextDecoder;
-use super::*;
-use crate::base::SharedEncoding;
-use crate::parser::Lexeme;
-use crate::rewriter::RewritingError;
-use bitflags::bitflags;
-
 pub(crate) use self::to_token::{ToToken, ToTokenResult};
 
+use bitflags::bitflags;
 bitflags! {
     #[derive(Debug, Clone, Copy, PartialEq, Eq)]
     pub struct TokenCaptureFlags: u8 {
@@ -20,77 +13,3 @@ bitflags! {
         const DOCTYPES = 0b0001_0000;
     }
 }
-
-#[derive(Debug)]
-pub(crate) enum TokenCapturerEvent<'i> {
-    LexemeConsumed,
-    TokenProduced(Box<Token<'i>>),
-}
-
-type CapturerEventHandler<'h> =
-    &'h mut dyn FnMut(TokenCapturerEvent<'_>) -> Result<(), RewritingError>;
-
-pub(crate) struct TokenCapturer {
-    encoding: SharedEncoding,
-    text_decoder: TextDecoder,
-    capture_flags: TokenCaptureFlags,
-}
-
-impl TokenCapturer {
-    #[inline]
-    #[must_use]
-    pub fn new(capture_flags: TokenCaptureFlags, encoding: SharedEncoding) -> Self {
-        Self {
-            encoding: SharedEncoding::clone(&encoding),
-            text_decoder: TextDecoder::new(encoding),
-            capture_flags,
-        }
-    }
-
-    #[inline]
-    #[must_use]
-    pub const fn has_captures(&self) -> bool {
-        !self.capture_flags.is_empty()
-    }
-
-    #[inline]
-    pub fn set_capture_flags(&mut self, flags: TokenCaptureFlags) {
-        self.capture_flags = flags;
-    }
-
-    #[inline]
-    pub fn flush_pending_text(
-        &mut self,
-        event_handler: CapturerEventHandler<'_>,
-    ) -> Result<(), RewritingError> {
-        self.text_decoder.flush_pending(event_handler)
-    }
-
-    pub fn feed<'i, T>(
-        &mut self,
-        lexeme: &Lexeme<'i, T>,
-        mut event_handler: impl FnMut(TokenCapturerEvent<'_>) -> Result<(), RewritingError>,
-    ) -> Result<(), RewritingError>
-    where
-        Lexeme<'i, T>: ToToken,
-    {
-        match lexeme.to_token(&mut self.capture_flags, self.encoding.get()) {
-            ToTokenResult::Token(token) => {
-                self.flush_pending_text(&mut event_handler)?;
-                event_handler(TokenCapturerEvent::LexemeConsumed)?;
-                event_handler(TokenCapturerEvent::TokenProduced(token))
-            }
-            ToTokenResult::Text(text_type) => {
-                if self.capture_flags.contains(TokenCaptureFlags::TEXT) {
-                    event_handler(TokenCapturerEvent::LexemeConsumed)?;
-
-                    self.text_decoder
-                        .feed_text(&lexeme.raw(), text_type, &mut event_handler)?;
-                }
-
-                Ok(())
-            }
-            ToTokenResult::None => self.flush_pending_text(&mut event_handler),
-        }
-    }
-}
diff --git a/src/rewritable_units/tokens/capturer/text_decoder.rs b/src/rewritable_units/tokens/capturer/text_decoder.rs
deleted file mode 100644
index 1a81e3e3..00000000
--- a/src/rewritable_units/tokens/capturer/text_decoder.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-use super::*;
-use crate::base::SharedEncoding;
-use crate::html::TextType;
-use crate::rewriter::RewritingError;
-use encoding_rs::{CoderResult, Decoder};
-
-// NOTE: this can't be refactored into method, because we hold a mutable reference for `self`
-// during the decoding loop in `feed_text`.
-macro_rules! emit {
-    ($self:tt, $text:expr, $last:ident, $event_handler:ident) => {{
-        let token = TextChunk::new_token($text, $self.last_text_type, $last, $self.encoding.get());
-
-        $event_handler(TokenCapturerEvent::TokenProduced(Box::new(token)))
-    }};
-}
-
-pub(crate) struct TextDecoder {
-    encoding: SharedEncoding,
-    pending_text_streaming_decoder: Option<Decoder>,
-    text_buffer: String,
-    last_text_type: TextType,
-}
-
-impl TextDecoder {
-    #[inline]
-    #[must_use]
-    pub fn new(encoding: SharedEncoding) -> Self {
-        Self {
-            encoding,
-            pending_text_streaming_decoder: None,
-            // TODO make adjustable
-            text_buffer: String::from_utf8(vec![0u8; 1024]).unwrap(),
-            last_text_type: TextType::Data,
-        }
-    }
-
-    #[inline]
-    pub fn flush_pending(
-        &mut self,
-        event_handler: CapturerEventHandler<'_>,
-    ) -> Result<(), RewritingError> {
-        if self.pending_text_streaming_decoder.is_some() {
-            self.decode_with_streaming_decoder(&[], true, event_handler)?;
-            self.pending_text_streaming_decoder = None;
-        }
-        Ok(())
-    }
-
-    fn decode_with_streaming_decoder(
-        &mut self,
-        raw: &[u8],
-        last: bool,
-        event_handler: CapturerEventHandler<'_>,
-    ) -> Result<(), RewritingError> {
-        let encoding = self.encoding.get();
-        let buffer = self.text_buffer.as_mut_str();
-
-        let decoder = self
-            .pending_text_streaming_decoder
-            .get_or_insert_with(|| encoding.new_decoder_without_bom_handling());
-
-        let mut consumed = 0;
-
-        loop {
-            let (status, read, written, ..) = decoder.decode_to_str(&raw[consumed..], buffer, last);
-
-            if written > 0 || last {
-                emit!(self, &buffer[..written], last, event_handler)?;
-            }
-
-            if status == CoderResult::InputEmpty {
-                break;
-            }
-
-            consumed += read;
-        }
-
-        Ok(())
-    }
-
-    #[inline]
-    pub fn feed_text(
-        &mut self,
-        raw: &[u8],
-        text_type: TextType,
-        event_handler: CapturerEventHandler<'_>,
-    ) -> Result<(), RewritingError> {
-        self.last_text_type = text_type;
-        self.decode_with_streaming_decoder(raw, false, event_handler)
-    }
-}
diff --git a/src/rewritable_units/tokens/capturer/to_token.rs b/src/rewritable_units/tokens/capturer/to_token.rs
index 2d042847..b79e6827 100644
--- a/src/rewritable_units/tokens/capturer/to_token.rs
+++ b/src/rewritable_units/tokens/capturer/to_token.rs
@@ -1,21 +1,15 @@
-use super::*;
+use super::TokenCaptureFlags;
 use crate::html::TextType;
 use crate::parser::{NonTagContentLexeme, NonTagContentTokenOutline, TagLexeme, TagTokenOutline};
+use crate::rewritable_units::{Attributes, Comment, Doctype, EndTag, StartTag, Token};
 use encoding_rs::Encoding;
 
 pub(crate) enum ToTokenResult<'i> {
-    Token(Box<Token<'i>>),
+    Token(Token<'i>),
     Text(TextType),
     None,
 }
 
-impl<'i> From<Token<'i>> for ToTokenResult<'i> {
-    #[inline]
-    fn from(token: Token<'i>) -> Self {
-        ToTokenResult::Token(Box::new(token))
-    }
-}
-
 pub(crate) trait ToToken {
     fn to_token(
         &self,
@@ -25,6 +19,7 @@ pub(crate) trait ToToken {
 }
 
 impl ToToken for TagLexeme<'_> {
+    #[inline]
     fn to_token(
         &self,
         capture_flags: &mut TokenCaptureFlags,
@@ -40,16 +35,14 @@ impl ToToken for TagLexeme<'_> {
             } if capture_flags.contains(TokenCaptureFlags::NEXT_START_TAG) => {
                 // NOTE: clear the flag once we've seen required start tag.
                 capture_flags.remove(TokenCaptureFlags::NEXT_START_TAG);
-
-                StartTag::new_token(
+                ToTokenResult::Token(StartTag::new_token(
                     self.part(name),
                     Attributes::new(self.input(), attributes, encoding),
                     ns,
                     self_closing,
                     self.raw(),
                     encoding,
-                )
-                .into()
+                ))
             }
 
             TagTokenOutline::EndTag { name, .. }
@@ -57,8 +50,7 @@ impl ToToken for TagLexeme<'_> {
             {
                 // NOTE: clear the flag once we've seen required end tag.
                 capture_flags.remove(TokenCaptureFlags::NEXT_END_TAG);
-
-                EndTag::new_token(self.part(name), self.raw(), encoding).into()
+                ToTokenResult::Token(EndTag::new_token(self.part(name), self.raw(), encoding))
             }
             _ => ToTokenResult::None,
         }
@@ -66,17 +58,23 @@ impl ToToken for TagLexeme<'_> {
 }
 
 impl ToToken for NonTagContentLexeme<'_> {
+    #[inline]
     fn to_token(
         &self,
         capture_flags: &mut TokenCaptureFlags,
         encoding: &'static Encoding,
     ) -> ToTokenResult<'_> {
         match *self.token_outline() {
-            Some(NonTagContentTokenOutline::Text(text_type)) => ToTokenResult::Text(text_type),
+            Some(NonTagContentTokenOutline::Text(text_type))
+                if capture_flags.contains(TokenCaptureFlags::TEXT) =>
+            {
+                ToTokenResult::Text(text_type)
+            }
+
             Some(NonTagContentTokenOutline::Comment(text))
                 if capture_flags.contains(TokenCaptureFlags::COMMENTS) =>
             {
-                Comment::new_token(self.part(text), self.raw(), encoding).into()
+                ToTokenResult::Token(Comment::new_token(self.part(text), self.raw(), encoding))
             }
 
             Some(NonTagContentTokenOutline::Doctype {
@@ -84,16 +82,17 @@ impl ToToken for NonTagContentLexeme<'_> {
                 public_id,
                 system_id,
                 force_quirks,
-            }) if capture_flags.contains(TokenCaptureFlags::DOCTYPES) => Doctype::new_token(
-                self.opt_part(name),
-                self.opt_part(public_id),
-                self.opt_part(system_id),
-                force_quirks,
-                false, // removed
-                self.raw(),
-                encoding,
-            )
-            .into(),
+            }) if capture_flags.contains(TokenCaptureFlags::DOCTYPES) => {
+                ToTokenResult::Token(Doctype::new_token(
+                    self.opt_part(name),
+                    self.opt_part(public_id),
+                    self.opt_part(system_id),
+                    force_quirks,
+                    false, // removed
+                    self.raw(),
+                    encoding,
+                ))
+            }
             _ => ToTokenResult::None,
         }
     }
diff --git a/src/rewritable_units/tokens/mod.rs b/src/rewritable_units/tokens/mod.rs
index 37f5372c..4aa6ed33 100644
--- a/src/rewritable_units/tokens/mod.rs
+++ b/src/rewritable_units/tokens/mod.rs
@@ -21,7 +21,7 @@ macro_rules! impl_serialize {
                 mut self,
                 output_handler: &mut dyn FnMut(&[u8]),
             ) -> Result<(), crate::errors::RewritingError> {
-                let mut encoder = crate::rewritable_units::text_encoder::StreamingHandlerSink::new(
+                let mut encoder = crate::rewritable_units::StreamingHandlerSink::new(
                     self.encoding,
                     output_handler,
                 );
diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs
index d160f3f9..c39c5251 100644
--- a/src/rewritable_units/tokens/text_chunk.rs
+++ b/src/rewritable_units/tokens/text_chunk.rs
@@ -1,4 +1,4 @@
-use super::{Mutations, Token};
+use super::Mutations;
 use crate::base::Bytes;
 use crate::errors::RewritingError;
 use crate::html::TextType;
@@ -72,20 +72,20 @@ pub struct TextChunk<'i> {
 impl<'i> TextChunk<'i> {
     #[inline]
     #[must_use]
-    pub(super) fn new_token(
+    pub(crate) fn new(
         text: &'i str,
         text_type: TextType,
         last_in_text_node: bool,
         encoding: &'static Encoding,
-    ) -> Token<'i> {
-        Token::TextChunk(TextChunk {
+    ) -> Self {
+        TextChunk {
             text: text.into(),
             text_type,
             last_in_text_node,
             encoding,
             mutations: Mutations::new(),
             user_data: Box::new(()),
-        })
+        }
     }
 
     /// Returns the textual content of the chunk.
@@ -381,14 +381,8 @@ mod tests {
 
     #[test]
     fn in_place_text_modifications() {
-        use super::super::Token;
-
         let encoding = Encoding::for_label_no_replacement(b"utf-8").unwrap();
-        let Token::TextChunk(mut chunk) =
-            TextChunk::new_token("original text", TextType::PlainText, true, encoding)
-        else {
-            unreachable!()
-        };
+        let mut chunk = TextChunk::new("original text", TextType::PlainText, true, encoding);
 
         assert_eq!(chunk.as_str(), "original text");
         chunk.set_str("hello".to_owned());
@@ -416,6 +410,7 @@ mod tests {
         macro_rules! skip_eof_chunk {
             ($c:ident) => {
                 if $c.last_in_text_node() {
+                    // This is not always true — a replacement char for an incomplete UTF-8 sequence could be flushed last
                     assert!($c.as_str().is_empty());
                     return;
                 }
@@ -507,5 +502,15 @@ mod tests {
                 "<before><foo & bar><after>"
             );
         }
+
+        #[test]
+        fn last_flush_text_decoder() {
+            let rewritten = rewrite_text_chunk(b"<p>\xF0\xF0\x9F\xF0\x9F\x98</p>", UTF_8, |c| {
+                if c.last_in_text_node() {
+                    c.after(" last", ContentType::Text);
+                }
+            });
+            assert_eq!("<p>\u{fffd}\u{fffd}\u{fffd} last</p>", rewritten);
+        }
     }
 }
diff --git a/src/transform_stream/dispatcher.rs b/src/transform_stream/dispatcher.rs
index 98c882b0..8b4a6741 100644
--- a/src/transform_stream/dispatcher.rs
+++ b/src/transform_stream/dispatcher.rs
@@ -1,15 +1,15 @@
 use crate::base::{Bytes, Range, SharedEncoding};
 use crate::html::{LocalName, Namespace};
+use crate::html_content::{TextChunk, TextType};
 use crate::parser::{
     AttributeBuffer, Lexeme, LexemeSink, NonTagContentLexeme, ParserDirective, ParserOutputSink,
     TagHintSink, TagLexeme, TagTokenOutline,
 };
-use crate::rewritable_units::{
-    DocumentEnd, Serialize, ToToken, Token, TokenCaptureFlags, TokenCapturer, TokenCapturerEvent,
-};
+use crate::rewritable_units::TextDecoder;
+use crate::rewritable_units::ToTokenResult;
+use crate::rewritable_units::{DocumentEnd, Serialize, ToToken, Token, TokenCaptureFlags};
 use crate::rewriter::RewritingError;
-
-use TagTokenOutline::{EndTag, StartTag};
+use encoding_rs::Encoding;
 
 pub(crate) struct AuxStartTagInfo<'i> {
     pub input: &'i Bytes<'i>,
@@ -67,45 +67,30 @@ impl<F: FnMut(&[u8])> OutputSink for F {
 }
 
 // Pub only for integration tests
-pub struct Dispatcher<C, O>
-where
-    C: TransformController,
-    O: OutputSink,
-{
+pub struct Dispatcher<C, O> {
+    delegate: DispatcherDelegate<C, O>,
+    text_decoder: TextDecoder,
+    last_text_type: TextType,
+    got_flags_from_hint: bool,
+    pending_element_aux_info_req: Option<AuxStartTagInfoRequest<C>>,
+    encoding: SharedEncoding,
+}
+
+/// Fields split out of `Dispatcher` for borrow checking of event handlers
+struct DispatcherDelegate<C, O> {
     transform_controller: C,
     output_sink: O,
     remaining_content_start: usize,
-    token_capturer: TokenCapturer,
-    got_flags_from_hint: bool,
-    pending_element_aux_info_req: Option<AuxStartTagInfoRequest<C>>,
+    capture_flags: TokenCaptureFlags,
     emission_enabled: bool,
-    encoding: SharedEncoding,
 }
 
-impl<C, O> Dispatcher<C, O>
+impl<C, O> DispatcherDelegate<C, O>
 where
     C: TransformController,
     O: OutputSink,
 {
-    pub fn new(transform_controller: C, output_sink: O, encoding: SharedEncoding) -> Self {
-        let initial_capture_flags = transform_controller.initial_capture_flags();
-
-        Self {
-            transform_controller,
-            output_sink,
-            remaining_content_start: 0,
-            token_capturer: TokenCapturer::new(
-                initial_capture_flags,
-                SharedEncoding::clone(&encoding),
-            ),
-            got_flags_from_hint: false,
-            pending_element_aux_info_req: None,
-            emission_enabled: true,
-            encoding,
-        }
-    }
-
-    pub fn flush_remaining_input(&mut self, input: &[u8], consumed_byte_count: usize) {
+    fn flush_remaining_input(&mut self, input: &[u8], consumed_byte_count: usize) {
         let output = &input[self.remaining_content_start..consumed_byte_count];
 
         if self.emission_enabled && !output.is_empty() {
@@ -115,10 +100,10 @@ where
         self.remaining_content_start = 0;
     }
 
-    pub fn finish(&mut self, input: &[u8]) -> Result<(), RewritingError> {
+    fn finish(&mut self, encoding: &'static Encoding, input: &[u8]) -> Result<(), RewritingError> {
         self.flush_remaining_input(input, input.len());
 
-        let mut document_end = DocumentEnd::new(&mut self.output_sink, self.encoding.get());
+        let mut document_end = DocumentEnd::new(&mut self.output_sink, encoding);
 
         self.transform_controller.handle_end(&mut document_end)?;
 
@@ -128,6 +113,88 @@ where
         Ok(())
     }
 
+    /// Returns offset to the end of the consumed range
+    #[inline(never)]
+    fn lexeme_consumed<T>(&mut self, lexeme: &Lexeme<'_, T>) -> usize {
+        let lexeme_range = lexeme.raw_range();
+
+        let chunk_range = Range {
+            start: self.remaining_content_start,
+            end: lexeme_range.start,
+        };
+
+        let chunk = lexeme.input().slice(chunk_range);
+
+        if self.emission_enabled && chunk.len() > 0 {
+            self.output_sink.handle_chunk(&chunk);
+        }
+
+        lexeme_range.end
+    }
+
+    #[inline]
+    fn token_produced(&mut self, mut token: Token<'_>) -> Result<(), RewritingError> {
+        trace!(@output token);
+
+        self.transform_controller.handle_token(&mut token)?;
+
+        if self.emission_enabled {
+            token.into_bytes(&mut |c| self.output_sink.handle_chunk(c))?;
+        }
+        Ok(())
+    }
+
+    fn text_token_produced(
+        &mut self,
+        text: &str,
+        encoding: &'static Encoding,
+        text_type: TextType,
+        is_last_in_node: bool,
+    ) -> Result<(), RewritingError> {
+        let mut token =
+            Token::TextChunk(TextChunk::new(text, text_type, is_last_in_node, encoding));
+
+        trace!(@output token);
+
+        self.transform_controller.handle_token(&mut token)?;
+
+        if self.emission_enabled {
+            token.into_bytes(&mut |c| self.output_sink.handle_chunk(c))?;
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn should_stop_removing_element_content(&self) -> bool {
+        !self.emission_enabled && self.transform_controller.should_emit_content()
+    }
+}
+
+impl<C, O> Dispatcher<C, O>
+where
+    C: TransformController,
+    O: OutputSink,
+{
+    pub fn new(transform_controller: C, output_sink: O, encoding: SharedEncoding) -> Self {
+        let capture_flags = transform_controller.initial_capture_flags();
+
+        Self {
+            delegate: DispatcherDelegate {
+                transform_controller,
+                output_sink,
+                capture_flags,
+                remaining_content_start: 0,
+                emission_enabled: true,
+            },
+            text_decoder: TextDecoder::new(SharedEncoding::clone(&encoding)),
+            last_text_type: TextType::Data,
+            encoding,
+            got_flags_from_hint: false,
+            pending_element_aux_info_req: None,
+        }
+    }
+
+    #[inline(never)]
     fn try_produce_token_from_lexeme<'i, T>(
         &mut self,
         lexeme: &Lexeme<'i, T>,
@@ -135,50 +202,61 @@ where
     where
         Lexeme<'i, T>: ToToken,
     {
-        let transform_controller = &mut self.transform_controller;
-        let output_sink = &mut self.output_sink;
-        let emission_enabled = self.emission_enabled;
-        let lexeme_range = lexeme.raw_range();
-        let remaining_content_start = self.remaining_content_start;
-        let mut lexeme_consumed = false;
-
-        self.token_capturer.feed(lexeme, |event| {
-            match event {
-                TokenCapturerEvent::LexemeConsumed => {
-                    let chunk = lexeme.input().slice(Range {
-                        start: remaining_content_start,
-                        end: lexeme_range.start,
+        let lexeme_consumed_end;
+
+        match lexeme.to_token(&mut self.delegate.capture_flags, self.encoding.get()) {
+            ToTokenResult::Token(token) => {
+                self.text_decoder
+                    .flush_pending(&mut |text, is_last, encoding| {
+                        self.delegate.text_token_produced(
+                            text,
+                            encoding,
+                            self.last_text_type,
+                            is_last,
+                        )
+                    })?;
+                lexeme_consumed_end = self.delegate.lexeme_consumed(lexeme);
+                self.delegate.token_produced(token)?;
+            }
+            ToTokenResult::Text(text_type) => {
+                lexeme_consumed_end = self.delegate.lexeme_consumed(lexeme);
+
+                self.last_text_type = text_type;
+                self.text_decoder.feed_text(
+                    &lexeme.raw(),
+                    false,
+                    &mut |text, is_last, encoding| {
+                        self.delegate.text_token_produced(
+                            text,
+                            encoding,
+                            self.last_text_type,
+                            is_last,
+                        )
+                    },
+                )?;
+            }
+            ToTokenResult::None => {
+                return self
+                    .text_decoder
+                    .flush_pending(&mut |text, is_last, encoding| {
+                        self.delegate.text_token_produced(
+                            text,
+                            encoding,
+                            self.last_text_type,
+                            is_last,
+                        )
                     });
-
-                    lexeme_consumed = true;
-
-                    if emission_enabled && chunk.len() > 0 {
-                        output_sink.handle_chunk(&chunk);
-                    }
-                }
-                TokenCapturerEvent::TokenProduced(mut token) => {
-                    trace!(@output token);
-
-                    transform_controller.handle_token(&mut token)?;
-
-                    if emission_enabled {
-                        token.into_bytes(&mut |c| output_sink.handle_chunk(c))?;
-                    }
-                }
             }
-            Ok(())
-        })?;
+        };
 
-        if lexeme_consumed {
-            self.remaining_content_start = lexeme_range.end;
-        }
+        self.delegate.remaining_content_start = lexeme_consumed_end;
 
         Ok(())
     }
 
     #[inline]
     const fn get_next_parser_directive(&self) -> ParserDirective {
-        if self.token_capturer.has_captures() {
+        if !self.delegate.capture_flags.is_empty() {
             ParserDirective::Lex
         } else {
             ParserDirective::WherePossibleScanForTagsOnly
@@ -194,7 +272,7 @@ where
         macro_rules! get_flags_from_aux_info_res {
             ($handler:expr, $attributes:expr, $self_closing:expr) => {
                 $handler(
-                    &mut self.transform_controller,
+                    &mut self.delegate.transform_controller,
                     AuxStartTagInfo {
                         input,
                         attr_buffer: $attributes,
@@ -208,7 +286,7 @@ where
             // NOTE: tag hint was produced for the tag, but
             // attributes and self closing flag were requested.
             Some(aux_info_req) => match *lexeme.token_outline() {
-                StartTag {
+                TagTokenOutline::StartTag {
                     ref attributes,
                     self_closing,
                     ..
@@ -221,7 +299,7 @@ where
             // NOTE: tag hint hasn't been produced for the tag, because
             // parser is not in the tag scan mode.
             None => match *lexeme.token_outline() {
-                StartTag {
+                TagTokenOutline::StartTag {
                     name,
                     name_hash,
                     ns,
@@ -230,7 +308,11 @@ where
                 } => {
                     let name = LocalName::new(input, name, name_hash);
 
-                    match self.transform_controller.handle_start_tag(name, ns) {
+                    match self
+                        .delegate
+                        .transform_controller
+                        .handle_start_tag(name, ns)
+                    {
                         Ok(flags) => Ok(flags),
                         Err(DispatcherError::InfoRequest(aux_info_req)) => {
                             get_flags_from_aux_info_res!(aux_info_req, &attributes, self_closing)
@@ -239,16 +321,16 @@ where
                     }
                 }
 
-                EndTag { name, name_hash } => {
+                TagTokenOutline::EndTag { name, name_hash } => {
                     let name = LocalName::new(input, name, name_hash);
-                    Ok(self.transform_controller.handle_end_tag(name))
+                    Ok(self.delegate.transform_controller.handle_end_tag(name))
                 }
             },
         };
 
         match capture_flags {
             Ok(flags) => {
-                self.token_capturer.set_capture_flags(flags);
+                self.delegate.capture_flags = flags;
                 Ok(())
             }
             Err(e) => Err(e),
@@ -260,37 +342,27 @@ where
         &mut self,
         flags: TokenCaptureFlags,
     ) -> ParserDirective {
-        self.token_capturer.set_capture_flags(flags);
+        self.delegate.capture_flags = flags;
         self.got_flags_from_hint = true;
         self.get_next_parser_directive()
     }
 
     #[inline]
     fn flush_pending_captured_text(&mut self) -> Result<(), RewritingError> {
-        let transform_controller = &mut self.transform_controller;
-        let output_sink = &mut self.output_sink;
-        let emission_enabled = self.emission_enabled;
-
-        self.token_capturer.flush_pending_text(&mut |event| {
-            if let TokenCapturerEvent::TokenProduced(mut token) = event {
-                trace!(@output token);
-
-                transform_controller.handle_token(&mut token)?;
-
-                if emission_enabled {
-                    token.into_bytes(&mut |c| output_sink.handle_chunk(c))?;
-                }
-            }
-
-            Ok(())
-        })?;
+        self.text_decoder
+            .flush_pending(&mut |text, is_last, encoding| {
+                self.delegate
+                    .text_token_produced(text, encoding, self.last_text_type, is_last)
+            })
+    }
 
-        Ok(())
+    pub fn flush_remaining_input(&mut self, input: &[u8], consumed_byte_count: usize) {
+        self.delegate
+            .flush_remaining_input(input, consumed_byte_count);
     }
 
-    #[inline]
-    fn should_stop_removing_element_content(&self) -> bool {
-        !self.emission_enabled && self.transform_controller.should_emit_content()
+    pub fn finish(&mut self, input: &[u8]) -> Result<(), RewritingError> {
+        self.delegate.finish(self.encoding.get(), input)
     }
 }
 
@@ -313,15 +385,15 @@ where
             self.adjust_capture_flags_for_tag_lexeme(lexeme)?;
         }
 
-        if let EndTag { .. } = lexeme.token_outline() {
-            if self.should_stop_removing_element_content() {
-                self.emission_enabled = true;
-                self.remaining_content_start = lexeme.raw_range().start;
+        if let TagTokenOutline::EndTag { .. } = lexeme.token_outline() {
+            if self.delegate.should_stop_removing_element_content() {
+                self.delegate.emission_enabled = true;
+                self.delegate.remaining_content_start = lexeme.raw_range().start;
             }
         }
 
         self.try_produce_token_from_lexeme(lexeme)?;
-        self.emission_enabled = self.transform_controller.should_emit_content();
+        self.delegate.emission_enabled = self.delegate.transform_controller.should_emit_content();
 
         Ok(self.get_next_parser_directive())
     }
@@ -345,7 +417,11 @@ where
         name: LocalName<'_>,
         ns: Namespace,
     ) -> Result<ParserDirective, RewritingError> {
-        match self.transform_controller.handle_start_tag(name, ns) {
+        match self
+            .delegate
+            .transform_controller
+            .handle_start_tag(name, ns)
+        {
             Ok(flags) => {
                 Ok(self.apply_capture_flags_from_hint_and_get_next_parser_directive(flags))
             }
@@ -365,13 +441,13 @@ where
     ) -> Result<ParserDirective, RewritingError> {
         self.flush_pending_captured_text()?;
 
-        let mut flags = self.transform_controller.handle_end_tag(name);
+        let mut flags = self.delegate.transform_controller.handle_end_tag(name);
 
         // NOTE: if emission was disabled (i.e. we've been removing element content)
         // we need to request the end tag lexeme, to ensure that we have it.
         // Otherwise, if we have unfinished end tag in the end of input we'll emit
         // it where we shouldn't.
-        if self.should_stop_removing_element_content() {
+        if self.delegate.should_stop_removing_element_content() {
             flags |= TokenCaptureFlags::NEXT_END_TAG;
         }