|
| 1 | +from abc import ABC, abstractmethod |
| 2 | + |
| 3 | +try: |
| 4 | + import charset_normalizer |
| 5 | +except ImportError: |
| 6 | + charset_normalizer = None |
| 7 | + |
| 8 | + |
| 9 | +class Decoder(ABC): |
| 10 | + """ |
| 11 | + Abstract base class for byte sequence decoders. |
| 12 | +
|
| 13 | + Implementations of this class provide a strategy for decoding bytes into |
| 14 | + strings, used when a UnicodeDecodeError occurs during standard |
| 15 | + decoding. Subclasses must implement the `decode` method, which receives |
| 16 | + the bytes to decode and the original UnicodeDecodeError. |
| 17 | +
|
| 18 | + Methods: |
| 19 | + decode(value: bytes, decode_error: UnicodeDecodeError) -> str: |
| 20 | + Attempts to decode the given bytes. Should raise the provided |
| 21 | + decode_error if decoding is not possible. |
| 22 | + """ |
| 23 | + |
| 24 | + @abstractmethod |
| 25 | + def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str: ... |
| 26 | + |
| 27 | + |
| 28 | +class DefaultDecoder(Decoder): |
| 29 | + """ |
| 30 | + Decoder implementation that attempts to detect the encoding using charset_normalizer |
| 31 | + if available. If charset_normalizer is not available, it raises again the |
| 32 | + UnicodeDecodeError. |
| 33 | + """ |
| 34 | + |
| 35 | + def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str: |
| 36 | + if charset_normalizer is None: |
| 37 | + raise decode_error |
| 38 | + detected_encoding = charset_normalizer.detect(value)["encoding"] |
| 39 | + if detected_encoding is None: |
| 40 | + raise decode_error |
| 41 | + return value.decode(detected_encoding) |
| 42 | + |
| 43 | + |
| 44 | +class NoopDecoder(Decoder): |
| 45 | + """ |
| 46 | + A decoder implementation that does not attempt to decode input bytes. |
| 47 | +
|
| 48 | + This class always raises the provided UnicodeDecodeError when its decode |
| 49 | + method is called. It can be used to disable automatic encoding detection |
| 50 | + and force strict decoding behavior, ensuring that decoding errors are |
| 51 | + not silently handled or guessed. |
| 52 | +
|
| 53 | + Methods: |
| 54 | + decode(value: bytes, decode_error: UnicodeDecodeError) -> str: |
| 55 | + Always raises the provided decode_error. |
| 56 | + """ |
| 57 | + |
| 58 | + def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str: |
| 59 | + raise decode_error |
| 60 | + |
| 61 | + |
| 62 | +class EncodingsSettings: |
| 63 | + """ |
| 64 | + Manages the decoding strategy for byte sequences in the application. |
| 65 | +
|
| 66 | + EncodingsSettings allows configuring which Decoder implementation is used |
| 67 | + to decode bytes when a UnicodeDecodeError occurs. By default, it uses |
| 68 | + DefaultDecoder, which attempts to detect the encoding using charset_normalizer |
| 69 | + if available. The decoder can be replaced at runtime using the `use` method. |
| 70 | +
|
| 71 | + Methods: |
| 72 | + use(decoder: Decoder) -> None: |
| 73 | + Sets the decoder to be used for decoding operations. |
| 74 | +
|
| 75 | + decode(value: bytes, decode_error: UnicodeDecodeError) -> str: |
| 76 | + Decodes the given bytes using the configured decoder. If decoding fails, |
| 77 | + the provided UnicodeDecodeError is raised or handled according to the |
| 78 | + decoder. |
| 79 | + """ |
| 80 | + |
| 81 | + def __init__(self) -> None: |
| 82 | + self._decoder = DefaultDecoder() |
| 83 | + |
| 84 | + def use(self, decoder: Decoder) -> None: |
| 85 | + self._decoder = decoder |
| 86 | + |
| 87 | + def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str: |
| 88 | + return self._decoder.decode(value, decode_error) |
| 89 | + |
| 90 | + |
| 91 | +encodings_settings = EncodingsSettings() |
0 commit comments