1+ from __future__ import annotations
2+
13import logging
24from os import PathLike
3- from typing import BinaryIO , List , Optional , Set , Union
5+ from typing import BinaryIO
46
57from .cd import (
68 coherence_ratio ,
2123 should_strip_sig_or_bom ,
2224)
2325
24- # Will most likely be controversial
25- # logging.addLevelName(TRACE, "TRACE")
2626logger = logging .getLogger ("charset_normalizer" )
2727explain_handler = logging .StreamHandler ()
2828explain_handler .setFormatter (
3131
3232
3333def from_bytes (
34- sequences : Union [ bytes , bytearray ] ,
34+ sequences : bytes | bytearray ,
3535 steps : int = 5 ,
3636 chunk_size : int = 512 ,
3737 threshold : float = 0.2 ,
38- cp_isolation : Optional [ List [ str ]] = None ,
39- cp_exclusion : Optional [ List [ str ]] = None ,
38+ cp_isolation : list [ str ] | None = None ,
39+ cp_exclusion : list [ str ] | None = None ,
4040 preemptive_behaviour : bool = True ,
4141 explain : bool = False ,
4242 language_threshold : float = 0.1 ,
@@ -62,7 +62,7 @@ def from_bytes(
6262
6363 if not isinstance (sequences , (bytearray , bytes )):
6464 raise TypeError (
65- "Expected object of type bytes or bytearray, got: {0 }" .format (
65+ "Expected object of type bytes or bytearray, got: {}" .format (
6666 type (sequences )
6767 )
6868 )
@@ -76,7 +76,7 @@ def from_bytes(
7676
7777 if length == 0 :
7878 logger .debug ("Encoding detection on empty bytes, assuming utf_8 intention." )
79- if explain :
79+ if explain : # Defensive: ensure exit path clean handler
8080 logger .removeHandler (explain_handler )
8181 logger .setLevel (previous_logger_level or logging .WARNING )
8282 return CharsetMatches ([CharsetMatch (sequences , "utf_8" , 0.0 , False , [], "" )])
@@ -135,9 +135,9 @@ def from_bytes(
135135 ),
136136 )
137137
138- prioritized_encodings : List [str ] = []
138+ prioritized_encodings : list [str ] = []
139139
140- specified_encoding : Optional [ str ] = (
140+ specified_encoding : str | None = (
141141 any_specified_encoding (sequences ) if preemptive_behaviour else None
142142 )
143143
@@ -149,13 +149,13 @@ def from_bytes(
149149 specified_encoding ,
150150 )
151151
152- tested : Set [str ] = set ()
153- tested_but_hard_failure : List [str ] = []
154- tested_but_soft_failure : List [str ] = []
152+ tested : set [str ] = set ()
153+ tested_but_hard_failure : list [str ] = []
154+ tested_but_soft_failure : list [str ] = []
155155
156- fallback_ascii : Optional [ CharsetMatch ] = None
157- fallback_u8 : Optional [ CharsetMatch ] = None
158- fallback_specified : Optional [ CharsetMatch ] = None
156+ fallback_ascii : CharsetMatch | None = None
157+ fallback_u8 : CharsetMatch | None = None
158+ fallback_specified : CharsetMatch | None = None
159159
160160 results : CharsetMatches = CharsetMatches ()
161161
@@ -189,7 +189,7 @@ def from_bytes(
189189
190190 tested .add (encoding_iana )
191191
192- decoded_payload : Optional [ str ] = None
192+ decoded_payload : str | None = None
193193 bom_or_sig_available : bool = sig_encoding == encoding_iana
194194 strip_sig_or_bom : bool = bom_or_sig_available and should_strip_sig_or_bom (
195195 encoding_iana
@@ -292,7 +292,7 @@ def from_bytes(
292292 early_stop_count : int = 0
293293 lazy_str_hard_failure = False
294294
295- md_chunks : List [str ] = []
295+ md_chunks : list [str ] = []
296296 md_ratios = []
297297
298298 try :
@@ -397,7 +397,7 @@ def from_bytes(
397397 )
398398
399399 if not is_multi_byte_decoder :
400- target_languages : List [str ] = encoding_languages (encoding_iana )
400+ target_languages : list [str ] = encoding_languages (encoding_iana )
401401 else :
402402 target_languages = mb_encoding_languages (encoding_iana )
403403
@@ -462,7 +462,7 @@ def from_bytes(
462462 "Encoding detection: %s is most likely the one." ,
463463 current_match .encoding ,
464464 )
465- if explain :
465+ if explain : # Defensive: ensure exit path clean handler
466466 logger .removeHandler (explain_handler )
467467 logger .setLevel (previous_logger_level )
468468 return CharsetMatches ([current_match ])
@@ -480,7 +480,7 @@ def from_bytes(
480480 "Encoding detection: %s is most likely the one." ,
481481 probable_result .encoding ,
482482 )
483- if explain :
483+ if explain : # Defensive: ensure exit path clean handler
484484 logger .removeHandler (explain_handler )
485485 logger .setLevel (previous_logger_level )
486486
@@ -492,7 +492,7 @@ def from_bytes(
492492 "the beginning of the sequence." ,
493493 encoding_iana ,
494494 )
495- if explain :
495+ if explain : # Defensive: ensure exit path clean handler
496496 logger .removeHandler (explain_handler )
497497 logger .setLevel (previous_logger_level )
498498 return CharsetMatches ([results [encoding_iana ]])
@@ -546,8 +546,8 @@ def from_fp(
546546 steps : int = 5 ,
547547 chunk_size : int = 512 ,
548548 threshold : float = 0.20 ,
549- cp_isolation : Optional [ List [ str ]] = None ,
550- cp_exclusion : Optional [ List [ str ]] = None ,
549+ cp_isolation : list [ str ] | None = None ,
550+ cp_exclusion : list [ str ] | None = None ,
551551 preemptive_behaviour : bool = True ,
552552 explain : bool = False ,
553553 language_threshold : float = 0.1 ,
@@ -572,12 +572,12 @@ def from_fp(
572572
573573
574574def from_path (
575- path : Union [ str , bytes , PathLike ] , # type: ignore[type-arg]
575+ path : str | bytes | PathLike , # type: ignore[type-arg]
576576 steps : int = 5 ,
577577 chunk_size : int = 512 ,
578578 threshold : float = 0.20 ,
579- cp_isolation : Optional [ List [ str ]] = None ,
580- cp_exclusion : Optional [ List [ str ]] = None ,
579+ cp_isolation : list [ str ] | None = None ,
580+ cp_exclusion : list [ str ] | None = None ,
581581 preemptive_behaviour : bool = True ,
582582 explain : bool = False ,
583583 language_threshold : float = 0.1 ,
@@ -603,12 +603,12 @@ def from_path(
603603
604604
605605def is_binary (
606- fp_or_path_or_payload : Union [ PathLike , str , BinaryIO , bytes ] , # type: ignore[type-arg]
606+ fp_or_path_or_payload : PathLike | str | BinaryIO | bytes , # type: ignore[type-arg]
607607 steps : int = 5 ,
608608 chunk_size : int = 512 ,
609609 threshold : float = 0.20 ,
610- cp_isolation : Optional [ List [ str ]] = None ,
611- cp_exclusion : Optional [ List [ str ]] = None ,
610+ cp_isolation : list [ str ] | None = None ,
611+ cp_exclusion : list [ str ] | None = None ,
612612 preemptive_behaviour : bool = True ,
613613 explain : bool = False ,
614614 language_threshold : float = 0.1 ,
0 commit comments