Skip to content

Commit 78c2c48

Browse files
itxasos23Zac-HD
andauthored
Handle NFC/NFD strings that normalize to the same string. (#10355)
Co-authored-by: Zac Hatfield-Dodds <[email protected]>
1 parent 8a40fc5 commit 78c2c48

File tree

5 files changed

+53
-10
lines changed

5 files changed

+53
-10
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ Ian Bicking
154154
Ian Lesperance
155155
Ilya Konstantinov
156156
Ionuț Turturică
157+
Itxaso Aizpurua
157158
Iwan Briquemont
158159
Jaap Broekhuizen
159160
Jakob van Santen

changelog/3426.improvement.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.

src/_pytest/_io/saferepr.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
4141
information on exceptions raised during the call.
4242
"""
4343

44-
def __init__(self, maxsize: Optional[int]) -> None:
44+
def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
4545
"""
4646
:param maxsize:
4747
If not None, will truncate the resulting repr to that specific size, using ellipsis
@@ -54,10 +54,15 @@ def __init__(self, maxsize: Optional[int]) -> None:
5454
# truncation.
5555
self.maxstring = maxsize if maxsize is not None else 1_000_000_000
5656
self.maxsize = maxsize
57+
self.use_ascii = use_ascii
5758

5859
def repr(self, x: object) -> str:
5960
try:
60-
s = super().repr(x)
61+
if self.use_ascii:
62+
s = ascii(x)
63+
else:
64+
s = super().repr(x)
65+
6166
except (KeyboardInterrupt, SystemExit):
6267
raise
6368
except BaseException as exc:
@@ -94,7 +99,9 @@ def safeformat(obj: object) -> str:
9499
DEFAULT_REPR_MAX_SIZE = 240
95100

96101

97-
def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str:
102+
def saferepr(
103+
obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
104+
) -> str:
98105
"""Return a size-limited safe repr-string for the given object.
99106
100107
Failing __repr__ functions of user instances will be represented
@@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
104111
This function is a wrapper around the Repr/reprlib functionality of the
105112
stdlib.
106113
"""
107-
return SafeRepr(maxsize).repr(obj)
114+
115+
return SafeRepr(maxsize, use_ascii).repr(obj)
108116

109117

110-
def saferepr_unlimited(obj: object) -> str:
118+
def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
111119
"""Return an unlimited-size safe repr-string for the given object.
112120
113121
As with saferepr, failing __repr__ functions of user instances
@@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str:
119127
when maxsize=None, but that might affect some other code.
120128
"""
121129
try:
130+
if use_ascii:
131+
return ascii(obj)
122132
return repr(obj)
123133
except Exception as exc:
124134
return _format_repr_exception(exc, obj)

src/_pytest/assertion/util.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Mapping
1111
from typing import Optional
1212
from typing import Sequence
13+
from unicodedata import normalize
1314

1415
import _pytest._code
1516
from _pytest import outcomes
@@ -156,20 +157,32 @@ def has_default_eq(
156157
return True
157158

158159

159-
def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]:
160+
def assertrepr_compare(
161+
config, op: str, left: Any, right: Any, use_ascii: bool = False
162+
) -> Optional[List[str]]:
160163
"""Return specialised explanations for some operators/operands."""
161164
verbose = config.getoption("verbose")
165+
166+
# Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
167+
# See issue #3246.
168+
use_ascii = (
169+
isinstance(left, str)
170+
and isinstance(right, str)
171+
and normalize("NFD", left) == normalize("NFD", right)
172+
)
173+
162174
if verbose > 1:
163-
left_repr = saferepr_unlimited(left)
164-
right_repr = saferepr_unlimited(right)
175+
left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
176+
right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
165177
else:
166178
# XXX: "15 chars indentation" is wrong
167179
# ("E AssertionError: assert "); should use term width.
168180
maxsize = (
169181
80 - 15 - len(op) - 2
170182
) // 2 # 15 chars indentation, 1 space around op
171-
left_repr = saferepr(left, maxsize=maxsize)
172-
right_repr = saferepr(right, maxsize=maxsize)
183+
184+
left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
185+
right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)
173186

174187
summary = f"{left_repr} {op} {right_repr}"
175188

testing/test_assertion.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,24 @@ def test_mojibake(self) -> None:
776776
msg = "\n".join(expl)
777777
assert msg
778778

779+
def test_nfc_nfd_same_string(self) -> None:
780+
# issue 3426
781+
left = "hyv\xe4"
782+
right = "hyva\u0308"
783+
expl = callequal(left, right)
784+
assert expl == [
785+
r"'hyv\xe4' == 'hyva\u0308'",
786+
f"- {str(right)}",
787+
f"+ {str(left)}",
788+
]
789+
790+
expl = callequal(left, right, verbose=2)
791+
assert expl == [
792+
r"'hyv\xe4' == 'hyva\u0308'",
793+
f"- {str(right)}",
794+
f"+ {str(left)}",
795+
]
796+
779797

780798
class TestAssert_reprcompare_dataclass:
781799
def test_dataclasses(self, pytester: Pytester) -> None:

0 commit comments

Comments
 (0)