Skip to content
24 changes: 17 additions & 7 deletions pytest_doctestplus/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def pytest_addoption(parser):
"This is no longer recommended, use --doctest-glob instead."
))

parser.addoption("--text-file-encoding", action="store",
help="Specify encoding for files.",
default="utf-8")

# Defaults to `atol` parameter from `numpy.allclose`.
parser.addoption("--doctest-plus-atol", action="store",
help="set the absolute tolerance for float comparison",
Expand Down Expand Up @@ -143,6 +147,9 @@ def pytest_addoption(parser):
"Default format for docs. "
"This is no longer recommended, use --doctest-glob instead.")

parser.addini("text_file_encoding",
"Default encoding for text files.", default=None)

parser.addini("doctest_optionflags", "option flags for doctests",
type="args", default=["ELLIPSIS", "NORMALIZE_WHITESPACE"],)

Expand Down Expand Up @@ -444,7 +451,7 @@ def parse(self, s, name=None):
continue

if config.getoption('remote_data', 'none') != 'any':
if any(re.match(fr'{comment_char}\s+doctest-remote-data-all\s*::', x.strip())
if any(re.match(fr'{comment_char}\s+doctest-remote-data-all\s*::', x.strip()) # noqa: E501
for x in lines):
skip_all = True
continue
Expand Down Expand Up @@ -912,13 +919,13 @@ def test_filter(test):
return tests


def write_modified_file(fname, new_fname, changes):
def write_modified_file(fname, new_fname, changes, encoding=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the default here be consistent with addoption default (utf-8)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it should be consistent, though I'm not sure if we should change the default to utf-8 or leave as is now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to set encoding=None to avoid changing the behavior of the write_modified_file function since I am not familiar with your codebase. I wanted to ensure that the function behaves as it did before.

From a developer's perspective, I would prefer using "utf-8" as the default encoding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any changes required?

i guess the default value of write_modified_file is uncritical.

default value for cli option defaults to "utf-8" which will be passed on.

# Sort in reversed order to edit the lines:
bad_tests = []
changes.sort(key=lambda x: (x["test_lineno"], x["example_lineno"]),
reverse=True)

with open(fname) as f:
with open(fname, encoding=encoding) as f:
text = f.readlines()

for change in changes:
Expand All @@ -939,7 +946,7 @@ def write_modified_file(fname, new_fname, changes):

text[lineno:lineno+want.count("\n")] = [got]

with open(new_fname, "w") as f:
with open(new_fname, "w", encoding=encoding) as f:
f.write("".join(text))

return bad_tests
Expand All @@ -954,6 +961,9 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
if not diff_mode:
return # we do not report or apply diffs

# get encoding to open file default ini=None or option="utf-8"
encoding = config.getini("text_file_encoding") or config.getoption("text_file_encoding")

if diff_mode != "overwrite":
# In this mode, we write a corrected file to a temporary folder in
# order to compare them (rather than modifying the file).
Expand All @@ -974,14 +984,14 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
new_fname = fname.replace(common_path, tmpdirname)
os.makedirs(os.path.split(new_fname)[0], exist_ok=True)

bad_tests = write_modified_file(fname, new_fname, changes)
bad_tests = write_modified_file(fname, new_fname, changes, encoding)
all_bad_tests.extend(bad_tests)

# git diff returns 1 to signal changes, so just ignore the
# exit status:
with subprocess.Popen(
["git", "diff", "-p", "--no-index", fname, new_fname],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) as p:
stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=encoding) as p:
p.wait()
# Diff should be fine, but write error if not:
diff = p.stderr.read()
Expand Down Expand Up @@ -1013,7 +1023,7 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
return
terminalreporter.write_line("Applied fix to the following files:")
for fname, changes in changesets.items():
bad_tests = write_modified_file(fname, fname, changes)
bad_tests = write_modified_file(fname, fname, changes, encoding)
all_bad_tests.extend(bad_tests)
terminalreporter.write_line(f" {fname}")

Expand Down
1 change: 1 addition & 0 deletions pytest_doctestplus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from importlib.metadata import distribution
from packaging.requirements import Requirement


class ModuleChecker:

def find_module(self, module):
Expand Down
146 changes: 146 additions & 0 deletions tests/test_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import locale
from pathlib import Path
from textwrap import dedent
from typing import Callable, Tuple

import pytest

pytest_plugins = ["pytester"]


@pytest.fixture(
params=[
("A", "a", "utf-8"),
("☆", "★", "utf-8"),
("b", "B", "cp1252"),
("☁", "☀", "utf-8"),
],
ids=[
"Aa-utf8",
"star-utf8",
"bB-cp1252",
"cloud-utf8",
],
)
def charset(request):
return request.param


@pytest.fixture()
def basic_file(tmp_path: Path) -> Callable[[str, str, str], Tuple[Path, str, str]]:

def makebasicfile(a, b, encoding: str) -> Tuple[str, str, str]:
"""alternative implementation without the use of `testdir.makepyfile`."""

content = """
def f():
'''
>>> print('{}')
{}
'''
pass
"""

original = dedent(content.format(a, b))
expected_result = dedent(content.format(a, a))

original_file = tmp_path.joinpath("test_basic.py")
original_file.write_text(original, encoding=encoding)

expected_diff = dedent(
f"""
>>> print('{a}')
- {b}
+ {a}
"""
).strip("\n")

return str(original_file), expected_diff, expected_result

return makebasicfile


def test_basic_file_encoding_diff(testdir, capsys, basic_file, charset):
"""
Test the diff from console output is as expected.
"""
a, b, encoding = charset

file, diff, _ = basic_file(a, b, encoding)

testdir.inline_run(
file, "--doctest-plus-generate-diff", "--text-file-encoding", encoding
)

stdout, _ = capsys.readouterr()
assert diff in stdout


def test_basic_file_encoding_overwrite(testdir, basic_file, charset):
"""
Test that the file is overwritten with the expected content.
"""

a, b, encoding = charset

file, _, expected = basic_file(a, b, encoding)

testdir.inline_run(
file,
"--doctest-plus-generate-diff",
"overwrite",
"--text-file-encoding",
encoding,
)

assert expected in Path(file).read_text(encoding)


def test_legacy_diff(testdir, capsys, basic_file, charset):
"""
Legacy test are supported to fail on Windows, when no encoding is provided.

On Windows this is cp1252, so "utf-8" are expected to fail while writing test files.
"""
a, b, _ = charset

try:
file, diff, _ = basic_file(a, b, None)
except UnicodeEncodeError:
encoding = locale.getpreferredencoding(False)
reason = f"could not encode {repr(charset)} with {encoding=}"
pytest.xfail(reason=reason)

testdir.inline_run(
file,
"--doctest-plus-generate-diff",
)

stdout, _ = capsys.readouterr()

assert diff in stdout


def test_legacy_overwrite(testdir, basic_file, charset):
"""
Legacy test are supported to fail on Windows, when no encoding is provided.

On Windows this is cp1252, so "utf-8" are expected to fail while writing test files.
"""

a, b, _encoding = charset

try:
file, _, expected = basic_file(a, b, None)
except UnicodeEncodeError:
encoding = locale.getpreferredencoding(False)
reason = f"could not encode {repr(charset)} with {encoding=}"
pytest.xfail(reason=reason)

testdir.inline_run(
file,
"--doctest-plus-generate-diff",
"overwrite",
)

assert expected in Path(file).read_text(_encoding)