python · srittau · Nov 14, 2025 · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025
diff --git a/stubs/bleach/bleach/html5lib_shim.pyi b/stubs/bleach/bleach/html5lib_shim.pyi
@@ -50,7 +50,7 @@ class InputStreamWithMemory:
 
 class BleachHTMLTokenizer(HTMLTokenizer):
     consume_entities: bool
-    stream: InputStreamWithMemory
+    stream: InputStreamWithMemory  # type: ignore[assignment]
     emitted_last_token: dict[str, Any] | None
     def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ...
 

diff --git a/stubs/bleach/bleach/linkifier.pyi b/stubs/bleach/bleach/linkifier.pyi
@@ -36,7 +36,7 @@ class Linker:
 # or `html5lib` token might be reused
 _Token: TypeAlias = dict[str, Any]
 
-class LinkifyFilter(Filter):
+class LinkifyFilter(Filter[_Token]):
     callbacks: Iterable[_Callback]
     skip_tags: Container[str]
     parse_email: bool

diff --git a/stubs/bleach/bleach/sanitizer.pyi b/stubs/bleach/bleach/sanitizer.pyi
@@ -1,3 +1,4 @@
+from _typeshed import Incomplete
 from collections.abc import Callable, Container, Iterable, Iterator
 from re import Pattern
 from typing import Final, Protocol, type_check_only
@@ -24,7 +25,7 @@ class NoCssSanitizerWarning(UserWarning): ...
 
 @type_check_only
 class _FilterConstructor(Protocol):
-    def __call__(self, *, source: BleachSanitizerFilter) -> Filter: ...
+    def __call__(self, *, source: BleachSanitizerFilter) -> Filter[Incomplete]: ...
 
 # _FilterConstructor used to be called _Filter
 # this alias is obsolete and can potentially be removed in the future
@@ -40,7 +41,7 @@ class Cleaner:
     protocols: Iterable[str]
     strip: bool
     strip_comments: bool
-    filters: Iterable[Filter]
+    filters: Iterable[_FilterConstructor]
     css_sanitizer: CSSSanitizer | None
     parser: BleachHTMLParser
     walker: TreeWalker
@@ -85,7 +86,7 @@ class BleachSanitizerFilter(SanitizerFilter):
     def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
     def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
     def __iter__(self) -> Iterator[_Token]: ...
-    def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...
+    def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...  # type: ignore[override]
     def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ...
     def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...
     def allow_token(self, token: _Token) -> _Token: ...

diff --git a/stubs/html5lib/METADATA.toml b/stubs/html5lib/METADATA.toml
@@ -1,5 +1,6 @@
 version = "1.1.*"
 upstream_repository = "https://github.com/html5lib/html5lib-python"
+requires = ["types-webencodings"]
 
 [tool.stubtest]
 extras = ["all"]
diff --git a/stubs/html5lib/html5lib/_ihatexml.pyi b/stubs/html5lib/html5lib/_ihatexml.pyi
@@ -1,6 +1,4 @@
 import re
-from _typeshed import Incomplete
-from collections.abc import Iterable
 
 baseChar: str
 ideographic: str
@@ -13,13 +11,13 @@ nameFirst: str
 reChar: re.Pattern[str]
 reCharRange: re.Pattern[str]
 
-def charStringToList(chars: str) -> list[str]: ...
-def normaliseCharList(charList: Iterable[str]) -> list[str]: ...
+def charStringToList(chars: str) -> list[list[int]]: ...
+def normaliseCharList(charList: list[list[int]]) -> list[list[int]]: ...
 
 max_unicode: int
 
-def missingRanges(charList: Iterable[str]) -> list[str]: ...
-def listToRegexpStr(charList): ...
+def missingRanges(charList: list[list[int]]) -> list[list[int]]: ...
+def listToRegexpStr(charList: list[list[int]]) -> str: ...
 def hexToInt(hex_str: str | bytes | bytearray) -> int: ...
 def escapeRegexp(string: str) -> str: ...
 
@@ -29,13 +27,13 @@ nonPubidCharRegexp: re.Pattern[str]
 
 class InfosetFilter:
     replacementRegexp: re.Pattern[str]
-    dropXmlnsLocalName: Incomplete
-    dropXmlnsAttrNs: Incomplete
-    preventDoubleDashComments: Incomplete
-    preventDashAtCommentEnd: Incomplete
-    replaceFormFeedCharacters: Incomplete
-    preventSingleQuotePubid: Incomplete
-    replaceCache: Incomplete
+    dropXmlnsLocalName: bool
+    dropXmlnsAttrNs: bool
+    preventDoubleDashComments: bool
+    preventDashAtCommentEnd: bool
+    replaceFormFeedCharacters: bool
+    preventSingleQuotePubid: bool
+    replaceCache: dict[str, str]
     def __init__(
         self,
         dropXmlnsLocalName: bool = False,
@@ -45,13 +43,13 @@ class InfosetFilter:
         replaceFormFeedCharacters: bool = True,
         preventSingleQuotePubid: bool = False,
     ) -> None: ...
-    def coerceAttribute(self, name, namespace=None): ...
-    def coerceElement(self, name): ...
-    def coerceComment(self, data): ...
-    def coerceCharacters(self, data): ...
-    def coercePubid(self, data): ...
-    def toXmlName(self, name): ...
-    def getReplacementCharacter(self, char): ...
-    def fromXmlName(self, name): ...
-    def escapeChar(self, char): ...
-    def unescapeChar(self, charcode): ...
+    def coerceAttribute(self, name: str, namespace: str | None = None) -> str | None: ...
+    def coerceElement(self, name: str) -> str: ...
+    def coerceComment(self, data: str) -> str: ...
+    def coerceCharacters(self, data: str) -> str: ...
+    def coercePubid(self, data: str) -> str: ...
+    def toXmlName(self, name: str) -> str: ...
+    def getReplacementCharacter(self, char: str) -> str: ...
+    def fromXmlName(self, name: str) -> str: ...
+    def escapeChar(self, char: str) -> str: ...
+    def unescapeChar(self, charcode: str | bytes | bytearray) -> str: ...
diff --git a/stubs/html5lib/html5lib/_inputstream.pyi b/stubs/html5lib/html5lib/_inputstream.pyi
@@ -1,37 +1,36 @@
-from _typeshed import Incomplete, SupportsRead
-from codecs import CodecInfo
-from typing import Protocol, overload, type_check_only
-from typing_extensions import TypeAlias
+import re
+from _io import BytesIO, StringIO
+from _typeshed import Incomplete, ReadableBuffer, SupportsRead
+from collections.abc import Callable, Iterable
+from typing import Any, AnyStr, Generic, Literal, TypeVar, overload
+from typing_extensions import Self, TypeAlias
 
-# Is actually webencodings.Encoding
-@type_check_only
-class _Encoding(Protocol):
-    name: str
-    codec_info: CodecInfo
-    def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
+from webencodings import Encoding
 
 _UnicodeInputStream: TypeAlias = str | SupportsRead[str]
 _BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
 _InputStream: TypeAlias = _UnicodeInputStream | _BinaryInputStream  # noqa: Y047  # used in other files
+_SupportsReadT = TypeVar("_SupportsReadT", bound=SupportsRead[Any])
+_SupportsReadBytesT = TypeVar("_SupportsReadBytesT", bound=SupportsRead[bytes])
 
-spaceCharactersBytes: Incomplete
-asciiLettersBytes: Incomplete
-asciiUppercaseBytes: Incomplete
-spacesAngleBrackets: Incomplete
+spaceCharactersBytes: frozenset[bytes]
+asciiLettersBytes: frozenset[bytes]
+asciiUppercaseBytes: frozenset[bytes]
+spacesAngleBrackets: frozenset[bytes]
 invalid_unicode_no_surrogate: str
-invalid_unicode_re: Incomplete
-non_bmp_invalid_codepoints: Incomplete
-ascii_punctuation_re: Incomplete
-charsUntilRegEx: Incomplete
+invalid_unicode_re: re.Pattern[str]
+non_bmp_invalid_codepoints: set[int]
+ascii_punctuation_re: re.Pattern[str]
+charsUntilRegEx: dict[tuple[Iterable[str | bytes | bytearray], bool], re.Pattern[str]]
 
-class BufferedStream:
-    stream: Incomplete
-    buffer: Incomplete
-    position: Incomplete
-    def __init__(self, stream) -> None: ...
-    def tell(self): ...
-    def seek(self, pos) -> None: ...
-    def read(self, bytes): ...
+class BufferedStream(Generic[AnyStr]):
+    stream: SupportsRead[AnyStr]
+    buffer: list[AnyStr]
+    position: list[int]
+    def __init__(self, stream: SupportsRead[AnyStr]) -> None: ...
+    def tell(self) -> int: ...
+    def seek(self, pos: int) -> None: ...
+    def read(self, bytes: int) -> AnyStr: ...
 
 @overload
 def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
@@ -48,9 +47,9 @@ def HTMLInputStream(
 ) -> HTMLBinaryInputStream: ...
 
 class HTMLUnicodeInputStream:
-    reportCharacterErrors: Incomplete
-    newLines: Incomplete
-    charEncoding: tuple[_Encoding, str]
+    reportCharacterErrors: Callable[[str], None]
+    newLines: list[int]
+    charEncoding: tuple[Encoding, str]
     dataStream: Incomplete
     def __init__(self, source: _UnicodeInputStream) -> None: ...
     chunk: str
@@ -60,14 +59,17 @@ class HTMLUnicodeInputStream:
     prevNumLines: int
     prevNumCols: int
     def reset(self) -> None: ...
-    def openStream(self, source): ...
+    @overload
+    def openStream(self, source: _SupportsReadT) -> _SupportsReadT: ...
+    @overload
+    def openStream(self, source: str | None) -> StringIO: ...
     def position(self) -> tuple[int, int]: ...
-    def char(self): ...
-    def readChunk(self, chunkSize=None): ...
-    def characterErrorsUCS4(self, data) -> None: ...
-    def characterErrorsUCS2(self, data) -> None: ...
-    def charsUntil(self, characters, opposite: bool = False): ...
-    def unget(self, char) -> None: ...
+    def char(self) -> str | None: ...
+    def readChunk(self, chunkSize: int | None = None) -> bool: ...
+    def characterErrorsUCS4(self, data: str) -> None: ...
+    def characterErrorsUCS2(self, data: str) -> None: ...
+    def charsUntil(self, characters: Iterable[str | bytes | bytearray], opposite: bool = False) -> str: ...
+    def unget(self, char: str | None) -> None: ...
 
 class HTMLBinaryInputStream(HTMLUnicodeInputStream):
     rawStream: Incomplete
@@ -77,8 +79,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
     transport_encoding: Incomplete
     same_origin_parent_encoding: Incomplete
     likely_encoding: Incomplete
-    default_encoding: Incomplete
-    charEncoding: tuple[_Encoding, str]
+    default_encoding: str
+    charEncoding: tuple[Encoding, str]
     def __init__(
         self,
         source: _BinaryInputStream,
@@ -91,46 +93,52 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
     ) -> None: ...
     dataStream: Incomplete
     def reset(self) -> None: ...
-    def openStream(self, source): ...
+    @overload  # type: ignore[override]
+    def openStream(self, source: _SupportsReadBytesT) -> _SupportsReadBytesT: ...
+    @overload  # type: ignore[override]
+    def openStream(self, source: ReadableBuffer) -> BytesIO: ...
     def determineEncoding(self, chardet: bool = True): ...
     def changeEncoding(self, newEncoding: str | bytes | None) -> None: ...
-    def detectBOM(self): ...
-    def detectEncodingMeta(self): ...
+    def detectBOM(self) -> Encoding | None: ...
+    def detectEncodingMeta(self) -> Encoding | None: ...
 
 class EncodingBytes(bytes):
-    def __new__(self, value): ...
-    def __init__(self, value) -> None: ...
-    def __iter__(self): ...
-    def __next__(self): ...
-    def next(self): ...
-    def previous(self): ...
-    def setPosition(self, position) -> None: ...
-    def getPosition(self): ...
-    position: Incomplete
-    def getCurrentByte(self): ...
+    def __new__(self, value: bytes) -> Self: ...
+    def __init__(self, value: bytes) -> None: ...
+    def __iter__(self) -> Self: ...  # type: ignore[override]
+    def __next__(self) -> bytes: ...
+    def next(self) -> bytes: ...
+    def previous(self) -> bytes: ...
+    def setPosition(self, position: int) -> None: ...
+    def getPosition(self) -> int | None: ...
     @property
-    def currentByte(self): ...
-    def skip(self, chars=...): ...
-    def skipUntil(self, chars): ...
-    def matchBytes(self, bytes): ...
-    def jumpTo(self, bytes): ...
+    def position(self) -> int | None: ...
+    @position.setter
+    def position(self, position: int) -> None: ...
+    def getCurrentByte(self) -> bytes: ...
+    @property
+    def currentByte(self) -> bytes: ...
+    def skip(self, chars: bytes | bytearray | Iterable[bytes] = ...) -> bytes | None: ...
+    def skipUntil(self, chars: bytes | bytearray | Iterable[bytes]) -> bytes | None: ...
+    def matchBytes(self, bytes: bytes | bytearray) -> bool: ...
+    def jumpTo(self, bytes: bytes | bytearray) -> Literal[True]: ...
 
 class EncodingParser:
-    data: Incomplete
-    encoding: Incomplete
-    def __init__(self, data) -> None: ...
-    def getEncoding(self): ...
-    def handleComment(self): ...
-    def handleMeta(self): ...
-    def handlePossibleStartTag(self): ...
-    def handlePossibleEndTag(self): ...
-    def handlePossibleTag(self, endTag): ...
-    def handleOther(self): ...
-    def getAttribute(self): ...
+    data: EncodingBytes
+    encoding: Encoding | None
+    def __init__(self, data: bytes) -> None: ...
+    def getEncoding(self) -> Encoding | None: ...
+    def handleComment(self) -> bool: ...
+    def handleMeta(self) -> bool: ...
+    def handlePossibleStartTag(self) -> bool: ...
+    def handlePossibleEndTag(self) -> bool: ...
+    def handlePossibleTag(self, endTag: bool | None) -> bool: ...
+    def handleOther(self) -> bool: ...
+    def getAttribute(self) -> tuple[bytes, bytes] | None: ...
 
 class ContentAttrParser:
-    data: Incomplete
-    def __init__(self, data) -> None: ...
-    def parse(self): ...
+    data: EncodingBytes
+    def __init__(self, data: EncodingBytes) -> None: ...
+    def parse(self) -> bytes | None: ...
 
-def lookupEncoding(encoding: str | bytes | None) -> str | None: ...
+def lookupEncoding(encoding: str | bytes | None) -> Encoding | None: ...