Skip to content

Commit eea4803

Browse files
abelcheungfrenzymadness
authored andcommitted
Add independent type hints files
1 parent 951d88e commit eea4803

File tree

3 files changed

+111
-0
lines changed

3 files changed

+111
-0
lines changed

lxml_html_clean/__init__.pyi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from .clean import (
2+
clean_html as clean_html,
3+
clean as clean,
4+
Cleaner as Cleaner,
5+
autolink as autolink,
6+
autolink_html as autolink_html,
7+
word_break as word_break,
8+
word_break_html as word_break_html
9+
)

lxml_html_clean/clean.pyi

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from typing import Collection, Iterable, Literal, Pattern, TypeVar, overload
2+
3+
from lxml.etree import _ElementTree
4+
from lxml.html import HtmlElement
5+
6+
# For methods generating output from input data, their types would match
7+
_DT = TypeVar("_DT", str, bytes, HtmlElement)
8+
_ET_DT = TypeVar("_ET_DT", str, bytes, HtmlElement, _ElementTree[HtmlElement])
9+
10+
11+
class Cleaner:
12+
@overload # allow_tags present, remove_unknown_tags must be False
13+
def __init__(
14+
self,
15+
*,
16+
scripts: bool = True,
17+
javascript: bool = True,
18+
comments: bool = True,
19+
style: bool = False,
20+
inline_style: bool | None = None,
21+
links: bool = True,
22+
meta: bool = True,
23+
page_structure: bool = True,
24+
processing_instructions: bool = True,
25+
embedded: bool = True,
26+
frames: bool = True,
27+
forms: bool = True,
28+
annoying_tags: bool = True,
29+
remove_tags: Collection[str] = (),
30+
allow_tags: Collection[str] = (),
31+
kill_tags: Collection[str] = (),
32+
remove_unknown_tags: Literal[False] = False,
33+
safe_attrs_only: bool = True,
34+
safe_attrs: Collection[str] = ...,
35+
add_nofollow: bool = False,
36+
host_whitelist: Collection[str] = (),
37+
whitelist_tags: Collection[str] | None = {"iframe", "embed"},
38+
) -> None: ...
39+
@overload # ... otherwise, allow_tags must not be used
40+
def __init__(
41+
self,
42+
*,
43+
scripts: bool = True,
44+
javascript: bool = True,
45+
comments: bool = True,
46+
style: bool = False,
47+
inline_style: bool | None = None,
48+
links: bool = True,
49+
meta: bool = True,
50+
page_structure: bool = True,
51+
processing_instructions: bool = True,
52+
embedded: bool = True,
53+
frames: bool = True,
54+
forms: bool = True,
55+
annoying_tags: bool = True,
56+
remove_tags: Collection[str] = (),
57+
kill_tags: Collection[str] = (),
58+
remove_unknown_tags: bool = True,
59+
safe_attrs_only: bool = True,
60+
safe_attrs: Collection[str] = ...,
61+
add_nofollow: bool = False,
62+
host_whitelist: Collection[str] = (),
63+
whitelist_tags: Collection[str] = {"iframe", "embed"},
64+
) -> None: ...
65+
def __call__(self, doc: HtmlElement | _ElementTree[HtmlElement]) -> None: ...
66+
def allow_follow(self, anchor: HtmlElement) -> bool: ...
67+
def allow_element(self, el: HtmlElement) -> bool: ...
68+
def allow_embedded_url(self, el: HtmlElement, url: str) -> bool: ...
69+
def kill_conditional_comments(self, doc: HtmlElement | _ElementTree[HtmlElement]) -> None: ...
70+
def clean_html(self, html: _ET_DT) -> _ET_DT: ...
71+
72+
clean: Cleaner
73+
clean_html = clean.clean_html
74+
75+
def autolink(
76+
el: HtmlElement,
77+
link_regexes: Iterable[Pattern[str]] = ...,
78+
avoid_elements: Collection[str] = ...,
79+
avoid_hosts: Iterable[Pattern[str]] = ...,
80+
avoid_classes: Collection[str] = ["nolink"],
81+
) -> None: ...
82+
def autolink_html(
83+
html: _DT,
84+
link_regexes: Iterable[Pattern[str]] = ...,
85+
avoid_elements: Collection[str] = ...,
86+
avoid_hosts: Iterable[Pattern[str]] = ...,
87+
avoid_classes: Collection[str] = ["nolink"],
88+
) -> _DT: ...
89+
def word_break(
90+
el: HtmlElement,
91+
max_width: int = 40,
92+
avoid_elements: Collection[str] = ["pre", "textarea", "code"],
93+
avoid_classes: Collection[str] = ["nobreak"],
94+
break_character: str = chr(0x200B),
95+
) -> None: ...
96+
def word_break_html(
97+
html: _DT,
98+
max_width: int = 40,
99+
avoid_elements: Collection[str] = ["pre", "textarea", "code"],
100+
avoid_classes: Collection[str] = ["nobreak"],
101+
break_character: str = chr(0x200B),
102+
) -> _DT: ...

lxml_html_clean/py.typed

Whitespace-only changes.

0 commit comments

Comments
 (0)