Skip to content

Commit bb26603

Browse files
test: enhance quote standardization tests with additional Unicode scenarios
1 parent c0c3fd6 commit bb26603

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

unstructured/metrics/text_extraction.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,9 @@ def standardize_quotes(text: str) -> str:
174174
"""
175175
# Double Quotes Dictionary
176176
double_quotes = {
177-
'"': "U+0022", # Standard typewriter/programmer's quote
178-
'"': "U+201C", # Left double quotation mark
179-
'"': "U+201D", # Right double quotation mark
177+
'"': "U+0022", # noqa 601 # Standard typewriter/programmer's quote
178+
'"': "U+201C", # noqa 601 # Left double quotation mark
179+
'"': "U+201D", # noqa 601 # Right double quotation mark
180180
"„": "U+201E", # Double low-9 quotation mark
181181
"‟": "U+201F", # Double high-reversed-9 quotation mark
182182
"«": "U+00AB", # Left-pointing double angle quotation mark
@@ -196,9 +196,9 @@ def standardize_quotes(text: str) -> str:
196196

197197
# Single Quotes Dictionary
198198
single_quotes = {
199-
"'": "U+0027", # Standard typewriter/programmer's quote
200-
"'": "U+2018", # Left single quotation mark
201-
"'": "U+2019", # Right single quotation mark
199+
"'": "U+0027", # noqa 601 # Standard typewriter/programmer's quote
200+
"'": "U+2018", # noqa 601 # Left single quotation mark
201+
"'": "U+2019", # noqa 601 # Right single quotation mark # noqa: W605
202202
"‚": "U+201A", # Single low-9 quotation mark
203203
"‛": "U+201B", # Single high-reversed-9 quotation mark
204204
"‹": "U+2039", # Single left-pointing angle quotation mark

0 commit comments

Comments
 (0)