scientific-python · bsipocz · Apr 11, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 19, 2025
diff --git a/pytest_doctestplus/plugin.py b/pytest_doctestplus/plugin.py
@@ -112,6 +112,10 @@ def pytest_addoption(parser):
                          "This is no longer recommended, use --doctest-glob instead."
                      ))
 
+    parser.addoption("--text-file-encoding", action="store",
+                     help="Specify encoding for files.",
+                     default="utf-8")
+
     # Defaults to `atol` parameter from `numpy.allclose`.
     parser.addoption("--doctest-plus-atol", action="store",
                      help="set the absolute tolerance for float comparison",
@@ -143,6 +147,9 @@ def pytest_addoption(parser):
                   "Default format for docs. "
                   "This is no longer recommended, use --doctest-glob instead.")
 
+    parser.addini("text_file_encoding",
+                  "Default encoding for text files.", default=None)
+
     parser.addini("doctest_optionflags", "option flags for doctests",
                   type="args", default=["ELLIPSIS", "NORMALIZE_WHITESPACE"],)
 
@@ -444,7 +451,7 @@ def parse(self, s, name=None):
                         continue
 
                     if config.getoption('remote_data', 'none') != 'any':
-                        if any(re.match(fr'{comment_char}\s+doctest-remote-data-all\s*::', x.strip())
+                        if any(re.match(fr'{comment_char}\s+doctest-remote-data-all\s*::', x.strip())  # noqa: E501
                                for x in lines):
                             skip_all = True
                             continue
@@ -912,13 +919,13 @@ def test_filter(test):
         return tests
 
 
-def write_modified_file(fname, new_fname, changes):
+def write_modified_file(fname, new_fname, changes, encoding=None):
     # Sort in reversed order to edit the lines:
     bad_tests = []
     changes.sort(key=lambda x: (x["test_lineno"], x["example_lineno"]),
                  reverse=True)
 
-    with open(fname) as f:
+    with open(fname, encoding=encoding) as f:
         text = f.readlines()
 
     for change in changes:
@@ -939,7 +946,7 @@ def write_modified_file(fname, new_fname, changes):
 
         text[lineno:lineno+want.count("\n")] = [got]
 
-    with open(new_fname, "w") as f:
+    with open(new_fname, "w", encoding=encoding) as f:
         f.write("".join(text))
 
     return bad_tests
@@ -954,6 +961,9 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
     if not diff_mode:
         return  # we do not report or apply diffs
 
+    # get encoding to open file default ini=None or option="utf-8"
+    encoding = config.getini("text_file_encoding") or config.getoption("text_file_encoding")
+
     if diff_mode != "overwrite":
         # In this mode, we write a corrected file to a temporary folder in
         # order to compare them (rather than modifying the file).
@@ -974,14 +984,14 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
                 new_fname = fname.replace(common_path, tmpdirname)
                 os.makedirs(os.path.split(new_fname)[0], exist_ok=True)
 
-                bad_tests = write_modified_file(fname, new_fname, changes)
+                bad_tests = write_modified_file(fname, new_fname, changes, encoding)
                 all_bad_tests.extend(bad_tests)
 
                 # git diff returns 1 to signal changes, so just ignore the
                 # exit status:
                 with subprocess.Popen(
                         ["git", "diff", "-p", "--no-index", fname, new_fname],
-                        stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) as p:
+                        stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=encoding) as p:
                     p.wait()
                     # Diff should be fine, but write error if not:
                     diff = p.stderr.read()
@@ -1013,7 +1023,7 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
             return
         terminalreporter.write_line("Applied fix to the following files:")
         for fname, changes in changesets.items():
-            bad_tests = write_modified_file(fname, fname, changes)
+            bad_tests = write_modified_file(fname, fname, changes, encoding)
             all_bad_tests.extend(bad_tests)
             terminalreporter.write_line(f"    {fname}")
 

diff --git a/pytest_doctestplus/utils.py b/pytest_doctestplus/utils.py
@@ -3,6 +3,7 @@
 from importlib.metadata import distribution
 from packaging.requirements import Requirement
 
+
 class ModuleChecker:
 
     def find_module(self, module):

diff --git a/tests/test_encoding.py b/tests/test_encoding.py
@@ -0,0 +1,146 @@
+import locale
+from pathlib import Path
+from textwrap import dedent
+from typing import Callable, Tuple
+
+import pytest
+
+pytest_plugins = ["pytester"]
+
+
+@pytest.fixture(
+    params=[
+        ("A", "a", "utf-8"),
+        ("☆", "★", "utf-8"),
+        ("b", "B", "cp1252"),
+        ("☁", "☀", "utf-8"),
+    ],
+    ids=[
+        "Aa-utf8",
+        "star-utf8",
+        "bB-cp1252",
+        "cloud-utf8",
+    ],
+)
+def charset(request):
+    return request.param
+
+
+@pytest.fixture()
+def basic_file(tmp_path: Path) -> Callable[[str, str, str], Tuple[Path, str, str]]:
+
+    def makebasicfile(a, b, encoding: str) -> Tuple[str, str, str]:
+        """alternative implementation without the use of `testdir.makepyfile`."""
+
+        content = """
+            def f():
+                '''
+                >>> print('{}')
+                {}
+                '''
+                pass
+            """
+
+        original = dedent(content.format(a, b))
+        expected_result = dedent(content.format(a, a))
+
+        original_file = tmp_path.joinpath("test_basic.py")
+        original_file.write_text(original, encoding=encoding)
+
+        expected_diff = dedent(
+            f"""
+                 >>> print('{a}')
+            -    {b}
+            +    {a}
+            """
+        ).strip("\n")
+
+        return str(original_file), expected_diff, expected_result
+
+    return makebasicfile
+
+
+def test_basic_file_encoding_diff(testdir, capsys, basic_file, charset):
+    """
+    Test the diff from console output is as expected.
+    """
+    a, b, encoding = charset
+
+    file, diff, _ = basic_file(a, b, encoding)
+
+    testdir.inline_run(
+        file, "--doctest-plus-generate-diff", "--text-file-encoding", encoding
+    )
+
+    stdout, _ = capsys.readouterr()
+    assert diff in stdout
+
+
+def test_basic_file_encoding_overwrite(testdir, basic_file, charset):
+    """
+    Test that the file is overwritten with the expected content.
+    """
+
+    a, b, encoding = charset
+
+    file, _, expected = basic_file(a, b, encoding)
+
+    testdir.inline_run(
+        file,
+        "--doctest-plus-generate-diff",
+        "overwrite",
+        "--text-file-encoding",
+        encoding,
+    )
+
+    assert expected in Path(file).read_text(encoding)
+
+
+def test_legacy_diff(testdir, capsys, basic_file, charset):
+    """
+    Legacy test are supported to fail on Windows, when no encoding is provided.
+
+    On Windows this is cp1252, so "utf-8" are expected to fail while writing test files.
+    """
+    a, b, _ = charset
+
+    try:
+        file, diff, _ = basic_file(a, b, None)
+    except UnicodeEncodeError:
+        encoding = locale.getpreferredencoding(False)
+        reason = f"could not encode {repr(charset)} with {encoding=}"
+        pytest.xfail(reason=reason)
+
+    testdir.inline_run(
+        file,
+        "--doctest-plus-generate-diff",
+    )
+
+    stdout, _ = capsys.readouterr()
+
+    assert diff in stdout
+
+
+def test_legacy_overwrite(testdir, basic_file, charset):
+    """
+    Legacy test are supported to fail on Windows, when no encoding is provided.
+
+    On Windows this is cp1252, so "utf-8" are expected to fail while writing test files.
+    """
+
+    a, b, _encoding = charset
+
+    try:
+        file, _, expected = basic_file(a, b, None)
+    except UnicodeEncodeError:
+        encoding = locale.getpreferredencoding(False)
+        reason = f"could not encode {repr(charset)} with {encoding=}"
+        pytest.xfail(reason=reason)
+
+    testdir.inline_run(
+        file,
+        "--doctest-plus-generate-diff",
+        "overwrite",
+    )
+
+    assert expected in Path(file).read_text(_encoding)