Skip to content

Commit 37dc9a6

Browse files
Add encoding tests, remove DefaultDecoder (#586)
1 parent 6801abe commit 37dc9a6

File tree

5 files changed

+93
-36
lines changed

5 files changed

+93
-36
lines changed

CHANGELOG.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [2.3.3] - 2025-06-??
99

10-
- Make `charset_normalizer` an **optional** dependency. This library is
11-
optional and is only used when a `UnicodeDecodeError` exception occurs when
12-
parsing the body of a web request. This can happen in two circumstances:
13-
when the client sends a payload specifying the wrong encoding in the
14-
`Content-Type` request header, or when the client sends a payload that is not
15-
`UTF-8` encoded and without specifying the charset encoding.
10+
- Remove `charset-normalizer` dependency. This library was used only when a
11+
`UnicodeDecodeError` exception occurred when parsing the body of a web
12+
request. This can happen in two circumstances: when the client sends a
13+
payload specifying the wrong encoding in the `Content-Type` request header,
14+
or when the client sends a payload that is not `UTF-8` encoded and without
15+
specifying the charset encoding.
16+
- Now the framework always returns `Bad Request` with a useful error message
17+
in the response payload, in the circumstances described in the point above.
1618
- Correct bug in the `parse_charset` function that prevented proper parsing and
1719
optimal handling of input encodings different than `UTF8`. Parsing still
18-
worked in this case because of the automatic fallback to `charset_normalizer`.
19-
- Correct the output of `request.charset` when the charset is obtained from
20-
the 'Content-Type' request header.
20+
worked in this case because of the automatic fallback to
21+
`charset-normalizer`.
22+
- Correct the output of `request.charset` when the charset is obtained from the
23+
'Content-Type' request header.
2124

2225
## [2.3.2] - 2025-06-17 :telescope:
2326

blacksheep/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
__author__ = "Roberto Prevato <[email protected]>"
7-
__version__ = "2.3.2"
7+
__version__ = "2.3.3"
88

99
from .contents import Content as Content
1010
from .contents import FormContent as FormContent

blacksheep/server/rendering/jinja2.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from blacksheep.messages import Request
1717
from blacksheep.server.csrf import AntiForgeryHandler, MissingRequestContextError
18+
from blacksheep.utils import truthy
1819

1920
from .abc import Renderer
2021

@@ -103,8 +104,8 @@ def __init__(
103104
os.environ.get("APP_JINJA_PACKAGE_PATH", "views"),
104105
),
105106
autoescape=select_autoescape(["html", "xml", "jinja"]),
106-
auto_reload=bool(os.environ.get("APP_JINJA_DEBUG", "")) or debug,
107-
enable_async=bool(os.environ.get("APP_JINJA_ENABLE_ASYNC", ""))
107+
auto_reload=truthy(os.environ.get("APP_JINJA_DEBUG", "")) or debug,
108+
enable_async=truthy(os.environ.get("APP_JINJA_ENABLE_ASYNC", ""))
108109
or enable_async,
109110
)
110111

blacksheep/settings/encodings.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
from abc import ABC, abstractmethod
22

3-
try:
4-
import charset_normalizer
5-
except ImportError:
6-
charset_normalizer = None
7-
83

94
class Decoder(ABC):
105
"""
@@ -25,22 +20,6 @@ class Decoder(ABC):
2520
def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str: ...
2621

2722

28-
class DefaultDecoder(Decoder):
29-
"""
30-
Decoder implementation that attempts to detect the encoding using charset_normalizer
31-
if available. If charset_normalizer is not available, it raises again the
32-
UnicodeDecodeError.
33-
"""
34-
35-
def decode(self, value: bytes, decode_error: UnicodeDecodeError) -> str:
36-
if charset_normalizer is None:
37-
raise decode_error
38-
detected_encoding = charset_normalizer.detect(value)["encoding"]
39-
if detected_encoding is None:
40-
raise decode_error
41-
return value.decode(detected_encoding)
42-
43-
4423
class NoopDecoder(Decoder):
4524
"""
4625
A decoder implementation that does not attempt to decode input bytes.
@@ -65,8 +44,9 @@ class EncodingsSettings:
6544
6645
EncodingsSettings allows configuring which Decoder implementation is used
6746
to decode bytes when a UnicodeDecodeError occurs. By default, it uses
68-
DefaultDecoder, which attempts to detect the encoding using charset_normalizer
69-
if available. The decoder can be replaced at runtime using the `use` method.
47+
NoopDecoder, which does not attempt to detect the encoding and re-raises the
48+
UnicodeDecodeError for further processing. The decoder can be replaced at runtime
49+
using the `use` method.
7050
7151
Methods:
7252
use(decoder: Decoder) -> None:
@@ -79,7 +59,7 @@ class EncodingsSettings:
7959
"""
8060

8161
def __init__(self) -> None:
82-
self._decoder = DefaultDecoder()
62+
self._decoder = NoopDecoder()
8363

8464
def use(self, decoder: Decoder) -> None:
8565
self._decoder = decoder

tests/test_encodings.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from dataclasses import dataclass
2+
3+
from blacksheep.server.responses import ok
4+
from blacksheep.server.routing import Router
5+
from blacksheep.testing.helpers import get_example_scope
6+
from blacksheep.testing.messages import MockReceive, MockSend
7+
from tests.utils.application import FakeApplication
8+
9+
10+
@dataclass
11+
class Cat:
12+
id: int
13+
name: str
14+
15+
16+
async def test_application_encoding_error_1():
17+
app = FakeApplication(router=Router())
18+
19+
@app.router.post("/")
20+
def home(data: Cat):
21+
return ok(data)
22+
23+
# Simulate a request where the client declares a wrong encoding
24+
# the payload is encoded using ISO-8859-1 but the client declares UTF-8
25+
scope = get_example_scope(
26+
"POST",
27+
"/",
28+
[(b"Content-Type", b"Content-Type: application/json; charset=UTF-8")],
29+
)
30+
31+
await app(
32+
scope,
33+
MockReceive(['{"id": 1, "name": "Café"}'.encode("ISO-8859-1")]),
34+
MockSend(),
35+
)
36+
37+
response = app.response
38+
# Response status is Bad Request 400
39+
assert response is not None
40+
assert response.status == 400
41+
# The response body contains useful information
42+
text = await response.text()
43+
assert "Cannot decode the request content using: utf-8." in text
44+
45+
46+
async def test_application_encoding_correct_1():
47+
app = FakeApplication(router=Router())
48+
49+
@app.router.post("/")
50+
def home(data: Cat):
51+
return ok(data)
52+
53+
# Simulate a request where the client declares properly an encoding different than
54+
# UTF-8
55+
scope = get_example_scope(
56+
"POST",
57+
"/",
58+
[(b"Content-Type", b"Content-Type: application/json; charset=ISO-8859-1")],
59+
)
60+
61+
await app(
62+
scope,
63+
MockReceive(['{"id": 1, "name": "Café"}'.encode("ISO-8859-1")]),
64+
MockSend(),
65+
)
66+
67+
response = app.response
68+
69+
assert response is not None
70+
assert response.status == 200
71+
# The response body contains useful information
72+
text = await response.text()
73+
assert '{"id":1,"name":"Café"}' == text

0 commit comments

Comments
 (0)