Add encoding tests, remove DefaultDecoder (#586)

RobertoPrevato · web-flow · commit 37dc9a6a01b4 · 2025-06-22T10:53:38.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,17 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [2.3.3] - 2025-06-??
 
-- Make `charset_normalizer` an **optional** dependency. This library is
-  optional and is only used when a `UnicodeDecodeError` exception occurs when
-  parsing the body of a web request. This can happen in two circumstances:
-  when the client sends a payload specifying the wrong encoding in the
-  `Content-Type` request header, or when the client sends a payload that is not
-  `UTF-8` encoded and without specifying the charset encoding.
+- Remove `charset-normalizer` dependency. This library was used only when a
+  `UnicodeDecodeError` exception occurred when parsing the body of a web
+  request. This can happen in two circumstances: when the client sends a
+  payload specifying the wrong encoding in the `Content-Type` request header,
+  or when the client sends a payload that is not `UTF-8` encoded and without
+  specifying the charset encoding.
+- Now the framework always returns `Bad Request` with a useful error message
+  in the response payload, in the circumstances described in the point above.
 - Correct bug in the `parse_charset` function that prevented proper parsing and
   optimal handling of input encodings different than `UTF8`. Parsing still
-  worked in this case because of the automatic fallback to `charset_normalizer`.
-- Correct the output of `request.charset` when the charset is obtained from
-  the 'Content-Type' request header.
+  worked in this case because of the automatic fallback to
+  `charset-normalizer`.
+- Correct the output of `request.charset` when the charset is obtained from the
+  'Content-Type' request header.
 
 ## [2.3.2] - 2025-06-17 :telescope:
 
diff --git a/blacksheep/__init__.py b/blacksheep/__init__.py
@@ -4,7 +4,7 @@
 """
 
 __author__ = "Roberto Prevato <roberto.prevato@gmail.com>"
-__version__ = "2.3.2"
+__version__ = "2.3.3"
 
 from .contents import Content as Content
 from .contents import FormContent as FormContent
diff --git a/blacksheep/server/rendering/jinja2.py b/blacksheep/server/rendering/jinja2.py
@@ -15,6 +15,7 @@
 
 from blacksheep.messages import Request
 from blacksheep.server.csrf import AntiForgeryHandler, MissingRequestContextError
+from blacksheep.utils import truthy
 
 from .abc import Renderer
 
@@ -103,8 +104,8 @@ def __init__(
                 os.environ.get("APP_JINJA_PACKAGE_PATH", "views"),
             ),
             autoescape=select_autoescape(["html", "xml", "jinja"]),
-            auto_reload=bool(os.environ.get("APP_JINJA_DEBUG", "")) or debug,
-            enable_async=bool(os.environ.get("APP_JINJA_ENABLE_ASYNC", ""))
+            auto_reload=truthy(os.environ.get("APP_JINJA_DEBUG", "")) or debug,
+            enable_async=truthy(os.environ.get("APP_JINJA_ENABLE_ASYNC", ""))
             or enable_async,
         )
 
diff --git a/blacksheep/settings/encodings.py b/blacksheep/settings/encodings.py
@@ -1,10 +1,5 @@
 from abc import ABC, abstractmethod
 
-try:
-    import charset_normalizer
-except ImportError:
-    charset_normalizer = None
-
 
 class Decoder(ABC):
     """
@@ -25,22 +20,6 @@ class Decoder(ABC):
     def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str: ...
 
 
-class DefaultDecoder(Decoder):
-    """
-    Decoder implementation that attempts to detect the encoding using charset_normalizer
-    if available. If charset_normalizer is not available, it raises again the
-    UnicodeDecodeError.
-    """
-
-    def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str:
-        if charset_normalizer is None:
-            raise decode_error
-        detected_encoding = charset_normalizer.detect(value)["encoding"]
-        if detected_encoding is None:
-            raise decode_error
-        return value.decode(detected_encoding)
-
-
 class NoopDecoder(Decoder):
     """
     A decoder implementation that does not attempt to decode input bytes.
@@ -65,8 +44,9 @@ class EncodingsSettings:
 
     EncodingsSettings allows configuring which Decoder implementation is used
     to decode bytes when a UnicodeDecodeError occurs. By default, it uses
-    DefaultDecoder, which attempts to detect the encoding using charset_normalizer
-    if available. The decoder can be replaced at runtime using the `use` method.
+    NoopDecoder, which does not attempt to detect the encoding and re-raises the
+    UnicodeDecodeError for further processing. The decoder can be replaced at runtime
+    using the `use` method.
 
     Methods:
         use(decoder: Decoder) -> None:
@@ -79,7 +59,7 @@ class EncodingsSettings:
     """
 
     def __init__(self) -> None:
-        self._decoder = DefaultDecoder()
+        self._decoder = NoopDecoder()
 
     def use(self, decoder: Decoder) -> None:
         self._decoder = decoder
diff --git a/tests/test_encodings.py b/tests/test_encodings.py
@@ -0,0 +1,73 @@
+from dataclasses import dataclass
+
+from blacksheep.server.responses import ok
+from blacksheep.server.routing import Router
+from blacksheep.testing.helpers import get_example_scope
+from blacksheep.testing.messages import MockReceive, MockSend
+from tests.utils.application import FakeApplication
+
+
+@dataclass
+class Cat:
+    id: int
+    name: str
+
+
+async def test_application_encoding_error_1():
+    app = FakeApplication(router=Router())
+
+    @app.router.post("/")
+    def home(data: Cat):
+        return ok(data)
+
+    # Simulate a request where the client declares a wrong encoding
+    # the payload is encoded using ISO-8859-1 but the client declares UTF-8
+    scope = get_example_scope(
+        "POST",
+        "/",
+        [(b"Content-Type", b"Content-Type: application/json; charset=UTF-8")],
+    )
+
+    await app(
+        scope,
+        MockReceive(['{"id": 1, "name": "Café"}'.encode("ISO-8859-1")]),
+        MockSend(),
+    )
+
+    response = app.response
+    # Response status is Bad Request 400
+    assert response is not None
+    assert response.status == 400
+    # The response body contains useful information
+    text = await response.text()
+    assert "Cannot decode the request content using: utf-8." in text
+
+
+async def test_application_encoding_correct_1():
+    app = FakeApplication(router=Router())
+
+    @app.router.post("/")
+    def home(data: Cat):
+        return ok(data)
+
+    # Simulate a request where the client declares properly an encoding different than
+    # UTF-8
+    scope = get_example_scope(
+        "POST",
+        "/",
+        [(b"Content-Type", b"Content-Type: application/json; charset=ISO-8859-1")],
+    )
+
+    await app(
+        scope,
+        MockReceive(['{"id": 1, "name": "Café"}'.encode("ISO-8859-1")]),
+        MockSend(),
+    )
+
+    response = app.response
+
+    assert response is not None
+    assert response.status == 200
+    # The response body contains useful information
+    text = await response.text()
+    assert '{"id":1,"name":"Café"}' == text