Skip to content
20 changes: 18 additions & 2 deletions Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -1593,6 +1593,7 @@ def __init__(self, path, *loader_details):
self._path_mtime = -1
self._path_cache = set()
self._relaxed_path_cache = set()
self._cache_is_normalized = False

def invalidate_caches(self):
"""Invalidate the directory mtime."""
Expand All @@ -1618,12 +1619,14 @@ def find_spec(self, fullname, target=None):
self._fill_cache()
self._path_mtime = mtime
# tail_module keeps the original casing, for __file__ and friends
if not tail_module.isascii() and not self._cache_is_normalized:
self._normalize_cache()
Comment on lines +1376 to +1377
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the sake of __import__() and importlib.import_module() we will probably need to either normalize cache_module or preserve the original name in the cache. The former sounds expensive so the latter might be the way to go.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regardless, we should probably have some test cases that use one of those functions rather than syntax.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the code to keep the unnormalised name around, we need the unnormalised name to access the filesystem and need the normalised name to compare to tail_module (which will generally be normalised by the compiler).

I agree that there should be more test cases, I started out with just the minimal test from the issue to see if I can get this to work without bootstrapping issues and get some feedback. I'll definitely add more tests when this approach is seen as acceptable by @brettcannon (as the importlib expert).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the cache always keeps the raw name then can we drop _cache_is_normalized?

if _relax_case():
cache = self._relaxed_path_cache
cache_module = tail_module.lower()
cache = self._relaxed_path_cache
else:
cache = self._path_cache
cache_module = tail_module
cache = self._path_cache
# Check if the module is the name of a directory (and thus a package).
if cache_module in cache:
base_path = _path_join(self.path, tail_module)
Expand Down Expand Up @@ -1685,6 +1688,19 @@ def _fill_cache(self):
if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS):
self._relaxed_path_cache = {fn.lower() for fn in contents}

self._cache_is_normalized = False

def _normalize_cache(self):
"""
Normalize all entries in the caches to NFKC.
"""
from unicodedata import normalize

self._path_cache = { normalize('NFKC', p) for p in self._path_cache }
self._relaxed_path_cache = { normalize('NFKC', p) for p in self._relaxed_path_cache }
self._cache_is_normalized = True


@classmethod
def path_hook(cls, *loader_details):
"""A class method which returns a closure to use on sys.path_hook
Expand Down
38 changes: 38 additions & 0 deletions Lib/test/test_import/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import threading
import time
import types
import unicodedata
import unittest
from unittest import mock
import _imp
Expand Down Expand Up @@ -2701,6 +2702,43 @@ def test_pyimport_addmodule_create(self):
mod = _testcapi.check_pyimport_addmodule(name)
self.assertIs(mod, sys.modules[name])

class TestImportAccented(unittest.TestCase):
# XXX: There should be tests with PYTHONCASEOK as well
# (for those platforms where this is relevant)
dir_name = os.path.abspath(TESTFN)

def setUp(self):
self.sys_path = sys.path[:]
os.mkdir(self.dir_name)
sys.path.insert(0, self.dir_name)
importlib.invalidate_caches()

def tearDown(self):
sys.path[:] = self.sys_path
importlib.invalidate_caches()
rmtree(self.dir_name)

def assert_importing_possible(self, name):
filename = os.path.join(self.dir_name, f"{name}.py")
with open(filename, "w") as stream:
stream.write("SPAM = 'spam'\n")

values = {}
exec(f"from {name} import SPAM", values, values)
self.assertEqual(values["SPAM"], "spam")
del sys.modules[unicodedata.normalize('NFKC', name)]

def test_import_precomposed(self):
name = 'M\u00E4dchen'
self.assert_importing_possible(name)

def test_import_normalized(self):
name = 'M\u0061\u0308dchen'
self.assert_importing_possible(name)

def test_import_macos_input(self):
name = 'Mädchen'
self.assert_importing_possible(name)

if __name__ == '__main__':
# Test needs to be a package, so we can do relative imports.
Expand Down