44
55import re
66from html .entities import name2codepoint
7- from typing import AnyStr , Iterable , Match , Optional , Pattern , Tuple , Union
7+ from typing import Iterable , Match , Optional , Pattern , Tuple , Union
88from urllib .parse import urljoin
99
1010from w3lib ._types import StrOrBytes
3434
3535
3636def replace_entities (
37- text : AnyStr ,
37+ text : StrOrBytes ,
3838 keep : Iterable [str ] = (),
3939 remove_illegal : bool = True ,
4040 encoding : str = "utf-8" ,
@@ -99,11 +99,13 @@ def convert_entity(m: Match[str]) -> str:
9999 return _ent_re .sub (convert_entity , to_unicode (text , encoding ))
100100
101101
102- def has_entities (text : AnyStr , encoding : Optional [str ] = None ) -> bool :
102+ def has_entities (text : StrOrBytes , encoding : Optional [str ] = None ) -> bool :
103103 return bool (_ent_re .search (to_unicode (text , encoding )))
104104
105105
106- def replace_tags (text : AnyStr , token : str = "" , encoding : Optional [str ] = None ) -> str :
106+ def replace_tags (
107+ text : StrOrBytes , token : str = "" , encoding : Optional [str ] = None
108+ ) -> str :
107109 """Replace all markup tags found in the given `text` by the given token.
108110 By default `token` is an empty string so it just removes all tags.
109111
@@ -129,7 +131,7 @@ def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None)
129131_REMOVECOMMENTS_RE = re .compile ("<!--.*?(?:-->|$)" , re .DOTALL )
130132
131133
132- def remove_comments (text : AnyStr , encoding : Optional [str ] = None ) -> str :
134+ def remove_comments (text : StrOrBytes , encoding : Optional [str ] = None ) -> str :
133135 """Remove HTML Comments.
134136
135137 >>> import w3lib.html
@@ -144,7 +146,7 @@ def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:
144146
145147
146148def remove_tags (
147- text : AnyStr ,
149+ text : StrOrBytes ,
148150 which_ones : Iterable [str ] = (),
149151 keep : Iterable [str ] = (),
150152 encoding : Optional [str ] = None ,
@@ -216,7 +218,7 @@ def remove_tag(m: Match[str]) -> str:
216218
217219
218220def remove_tags_with_content (
219- text : AnyStr , which_ones : Iterable [str ] = (), encoding : Optional [str ] = None
221+ text : StrOrBytes , which_ones : Iterable [str ] = (), encoding : Optional [str ] = None
220222) -> str :
221223 """Remove tags and their content.
222224
@@ -240,7 +242,7 @@ def remove_tags_with_content(
240242
241243
242244def replace_escape_chars (
243- text : AnyStr ,
245+ text : StrOrBytes ,
244246 which_ones : Iterable [str ] = ("\n " , "\t " , "\r " ),
245247 replace_by : StrOrBytes = "" ,
246248 encoding : Optional [str ] = None ,
@@ -262,7 +264,7 @@ def replace_escape_chars(
262264
263265
264266def unquote_markup (
265- text : AnyStr ,
267+ text : StrOrBytes ,
266268 keep : Iterable [str ] = (),
267269 remove_illegal : bool = True ,
268270 encoding : Optional [str ] = None ,
@@ -304,7 +306,7 @@ def _get_fragments(
304306
305307
306308def get_base_url (
307- text : AnyStr , baseurl : StrOrBytes = "" , encoding : str = "utf-8"
309+ text : StrOrBytes , baseurl : StrOrBytes = "" , encoding : str = "utf-8"
308310) -> str :
309311 """Return the base url if declared in the given HTML `text`,
310312 relative to the given base url.
@@ -324,7 +326,7 @@ def get_base_url(
324326
325327
326328def get_meta_refresh (
327- text : AnyStr ,
329+ text : StrOrBytes ,
328330 baseurl : str = "" ,
329331 encoding : str = "utf-8" ,
330332 ignore_tags : Iterable [str ] = ("script" , "noscript" ),
0 commit comments