Skip to content

Commit 985b962

Browse files
authored
fix(utils.py): specify utf-8 encoding when reading file (#1268)
1 parent 2dedd9f commit 985b962

File tree

3 files changed

+193
-5
lines changed

3 files changed

+193
-5
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import builtins
2+
import logging
3+
import pathlib
4+
5+
from twine import utils
6+
7+
8+
def _write_utf8_ini(path, username: str = "テストユーザー🐍") -> None:
9+
"""Write an ini file encoded in UTF-8.
10+
11+
Including an emoji makes decoding more likely to fail under locales
12+
like cp932.
13+
"""
14+
content = f"""[server-login]
15+
username = {username}
16+
password = secret
17+
"""
18+
# Write explicitly as UTF-8 bytes
19+
# (so reading will fail if the reader assumes a different encoding)
20+
path.write_bytes(content.encode("utf-8"))
21+
22+
23+
def test_parse_config_triggers_utf8_fallback(monkeypatch, caplog, tmp_path):
24+
"""Test UTF-8 fallback when UnicodeDecodeError is raised.
25+
26+
If the first read of the file raises a UnicodeDecodeError, _parse_config
27+
should take the UTF-8 fallback path. This test simulates a decode failure
28+
on the first I/O and then allows normal I/O so the fallback is exercised.
29+
"""
30+
ini_path = tmp_path / "pypirc"
31+
expected_username = "テストユーザー🐍"
32+
_write_utf8_ini(ini_path, expected_username)
33+
34+
# Coordinate a single "raise once" behavior across multiple common I/O
35+
# entrypoints.
36+
call = {"n": 0}
37+
original_open = builtins.open
38+
original_read_text = pathlib.Path.read_text
39+
40+
def open_raise_once(*args, **kwargs):
41+
# Only raise on the very first attempted open; afterwards delegate
42+
# to real open.
43+
if call["n"] == 0:
44+
call["n"] += 1
45+
# UnicodeDecodeError(encoding, object, start, end, reason)
46+
raise UnicodeDecodeError("utf-8", b"", 0, 1, "simulated")
47+
return original_open(*args, **kwargs)
48+
49+
def read_text_raise_once(self, encoding=None, errors=None):
50+
# Only raise on the very first attempted read_text; afterwards
51+
# delegate.
52+
if call["n"] == 0:
53+
call["n"] += 1
54+
raise UnicodeDecodeError("utf-8", b"", 0, 1, "simulated")
55+
return original_read_text(self, encoding=encoding, errors=errors)
56+
57+
# Patch both builtins.open and pathlib.Path.read_text to be robust
58+
# against whichever API _parse_config uses internally.
59+
monkeypatch.setattr(builtins, "open", open_raise_once)
60+
monkeypatch.setattr(pathlib.Path, "read_text", read_text_raise_once, raising=True)
61+
62+
caplog.set_level(logging.INFO, logger="twine")
63+
parser = utils._parse_config(str(ini_path))
64+
65+
# Ensure the parsed result is correct (file was read as UTF-8 after
66+
# fallback)
67+
assert parser.get("server-login", "username") == expected_username
68+
69+
# Ensure a log message indicating the fallback is present
70+
assert (
71+
"Configuration file not readable with default locale encoding, "
72+
"trying UTF-8" in caplog.text
73+
)
74+
75+
76+
def test_parse_config_no_fallback_when_default_utf8(monkeypatch, caplog, tmp_path):
77+
"""Test normal parsing when default encoding is UTF-8.
78+
79+
When the default encoding is UTF-8, no fallback is necessary and the
80+
file should be parsed via the normal path. To make this deterministic
81+
across Python versions/environments, force I/O calls without an explicit
82+
encoding to use UTF-8 by wrapping open / Path.read_text.
83+
"""
84+
ini_path = tmp_path / "pypirc"
85+
expected_username = "テストユーザー🐍"
86+
_write_utf8_ini(ini_path, expected_username)
87+
88+
# Wrap builtins.open so that if encoding is not provided, we force
89+
# utf-8.
90+
original_open = builtins.open
91+
92+
def open_force_utf8(
93+
file,
94+
mode="r",
95+
buffering=-1,
96+
encoding=None,
97+
errors=None,
98+
newline=None,
99+
closefd=True,
100+
opener=None,
101+
):
102+
if encoding is None and "b" not in mode:
103+
# delegate to real open but force utf-8 as default encoding
104+
return original_open(
105+
file,
106+
mode,
107+
buffering=buffering,
108+
encoding="utf-8",
109+
errors=errors,
110+
newline=newline,
111+
closefd=closefd,
112+
opener=opener,
113+
)
114+
return original_open(
115+
file,
116+
mode,
117+
buffering=buffering,
118+
encoding=encoding,
119+
errors=errors,
120+
newline=newline,
121+
closefd=closefd,
122+
opener=opener,
123+
)
124+
125+
# Wrap pathlib.Path.read_text similarly
126+
original_read_text = pathlib.Path.read_text
127+
128+
def read_text_force_utf8(self, encoding=None, errors=None):
129+
if encoding is None:
130+
return original_read_text(self, encoding="utf-8", errors=errors)
131+
return original_read_text(self, encoding=encoding, errors=errors)
132+
133+
monkeypatch.setattr(builtins, "open", open_force_utf8)
134+
monkeypatch.setattr(pathlib.Path, "read_text", read_text_force_utf8, raising=True)
135+
136+
caplog.set_level(logging.INFO, logger="twine")
137+
parser = utils._parse_config(str(ini_path))
138+
139+
# Ensure the parsed result is correct
140+
assert parser.get("server-login", "username") == expected_username
141+
142+
# Verify that the used configuration file path is present in the logs.
143+
assert f"Using configuration from {ini_path}" in caplog.text

twine/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,9 @@ class InvalidPyPIUploadURL(TwineException):
173173
"""
174174

175175
pass
176+
177+
178+
class UnableToReadConfigurationFile(TwineException):
179+
"""Configuration file exists but cannot be read (e.g. encoding issue)."""
180+
181+
pass

twine/utils.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,25 +50,64 @@
5050
logger = logging.getLogger(__name__)
5151

5252

53+
def _parse_file(path: str, **open_kwargs: Any) -> configparser.RawConfigParser:
54+
"""Open and parse a configuration file.
55+
56+
This helper performs a single open/read operation so that if a
57+
UnicodeDecodeError is raised it happens before the parser has been
58+
partially populated.
59+
"""
60+
parser = configparser.RawConfigParser()
61+
with open(path, **open_kwargs) as f:
62+
parser.read_file(f)
63+
return parser
64+
65+
66+
def _parse_config(path: str) -> configparser.RawConfigParser:
67+
"""Parse a config file with a UTF-8 fallback on decode errors.
68+
69+
Try to parse using the default system encoding first; if a
70+
UnicodeDecodeError occurs, retry using UTF-8 and log that a fallback
71+
was used.
72+
"""
73+
logger.info("Using configuration from %s", path)
74+
try:
75+
parser = _parse_file(path)
76+
except UnicodeDecodeError:
77+
logger.info(
78+
"Configuration file not readable with default locale encoding, trying UTF-8"
79+
)
80+
else:
81+
return parser
82+
83+
try:
84+
parser = _parse_file(path, encoding="utf-8")
85+
except UnicodeDecodeError as ude:
86+
raise exceptions.UnableToReadConfigurationFile(
87+
f"Unable to read configuration file: {path}"
88+
) from ude
89+
else:
90+
return parser
91+
92+
5393
def get_config(path: str) -> Dict[str, RepositoryConfig]:
5494
"""Read repository configuration from a file (i.e. ~/.pypirc).
5595
5696
Format: https://packaging.python.org/specifications/pypirc/
5797
5898
If the default config file doesn't exist, return a default configuration for
59-
pypyi and testpypi.
99+
pypi and testpypi.
60100
"""
61101
realpath = os.path.realpath(os.path.expanduser(path))
62-
parser = configparser.RawConfigParser()
63102

64103
try:
65-
with open(realpath) as f:
66-
parser.read_file(f)
67-
logger.info(f"Using configuration from {realpath}")
104+
parser = _parse_config(realpath)
68105
except FileNotFoundError:
69106
# User probably set --config-file, but the file can't be read
70107
if path != DEFAULT_CONFIG_FILE:
71108
raise
109+
# Create empty parser for missing default config file
110+
parser = configparser.RawConfigParser()
72111

73112
# server-login is obsolete, but retained for backwards compatibility
74113
defaults: RepositoryConfig = {

0 commit comments

Comments
 (0)