From 1e90580c01216d71beb9581d33ee61deecbb67b8 Mon Sep 17 00:00:00 2001 From: Kevin Gao Date: Tue, 6 Jun 2023 16:21:00 -0700 Subject: [PATCH 1/2] Make src_paths behave as expected when using --resolve-all-configs When using `--resolve-all-configs`, there is unexpected behavior in that `src_paths` ends up resolving relative to the project root, which defaults to the current working directory. This results in first-party modules being marked as third-party modules in the default case. Under the previous implementation, one possible workaround would be to specify the relative path to config directory (e.g. `relative/path/to/configdir/src`). However, assuming that the most common use of `--resolve-all-configs` is to support multiple sub-projects in the same repository/overall directory, this workaround would now require each sub-project to understand where it lives in the filesystem. This change proposes a fix that sets `directory` on the `config_data` to be the directory containing the used configuration file if not already set. Downstream, this directory is then used to resolve the absolute paths specified by `src_paths`. Fixes #2045 --- docs/configuration/options.md | 2 +- isort/settings.py | 2 ++ tests/unit/test_settings.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/configuration/options.md b/docs/configuration/options.md index c9065a6a..946aa5ad 100644 --- a/docs/configuration/options.md +++ b/docs/configuration/options.md @@ -1758,7 +1758,7 @@ Explicitly set the config root for resolving all configs. When used with the --r ## Resolve All Configs -Tells isort to resolve the configs for all sub-directories and sort files in terms of its closest config files. +Tells isort to resolve the configs for all sub-directories and sort files in terms of its closest config files. When using this option, `src_paths` will resolve relative to the directory that contains the config that was used by default. **Type:** Bool **Default:** `False` diff --git a/isort/settings.py b/isort/settings.py index 2c6b94bc..4b32454c 100644 --- a/isort/settings.py +++ b/isort/settings.py @@ -820,6 +820,8 @@ def find_all_configs(path: str) -> Trie: config_data = {} if config_data: + if "directory" not in config_data: + config_data["directory"] = dirpath trie_root.insert(potential_config_file, config_data) break diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index 295b30c1..02eddd09 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -241,6 +241,7 @@ def test_find_all_configs(tmpdir): pyproject_toml = """ [tool.isort] profile = "hug" +src_paths = ["src"] """ isort_cfg = """ @@ -280,14 +281,18 @@ def test_find_all_configs(tmpdir): config_info_1 = config_trie.search(str(dir1 / "test1.py")) assert config_info_1[0] == str(setup_cfg_file) assert config_info_1[0] == str(setup_cfg_file) and config_info_1[1]["profile"] == "django" + assert set(Config(**config_info_1[1]).src_paths) == {Path(dir1), Path(dir1, "src")} config_info_2 = config_trie.search(str(dir2 / "test2.py")) assert config_info_2[0] == str(pyproject_toml_file) assert config_info_2[0] == str(pyproject_toml_file) and config_info_2[1]["profile"] == "hug" + assert set(Config(**config_info_2[1]).src_paths) == {Path(dir2, "src")} config_info_3 = config_trie.search(str(dir3 / "test3.py")) assert config_info_3[0] == str(isort_cfg_file) assert config_info_3[0] == str(isort_cfg_file) and config_info_3[1]["profile"] == "black" + assert set(Config(**config_info_3[1]).src_paths) == {Path(dir3), Path(dir3, "src")} config_info_4 = config_trie.search(str(tmpdir / "file4.py")) assert config_info_4[0] == "default" + assert set(Config(**config_info_4[1]).src_paths) == {Path.cwd(), Path.cwd().joinpath("src")} From b5f83ab989ca3bc7d306a4b409023254069a3ad3 Mon Sep 17 00:00:00 2001 From: Kevin Gao Date: Wed, 7 Jun 2023 10:27:40 -0700 Subject: [PATCH 2/2] Greatly improve performance of find_all_configs Avoid recursing into default skip files like .venv. Also avoid doing an iteration per config file type. Lastly, use scandir to improve file system walk performance. The previous implementation would walk the entire tree, and iterate over each config source type to test if the file exists. Instead, walk over existing files and do a fast filter to remove candidates. --- isort/settings.py | 52 ++++++++++++++++++++++++------------- tests/unit/test_settings.py | 9 +++++++ 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/isort/settings.py b/isort/settings.py index 4b32454c..d229b49c 100644 --- a/isort/settings.py +++ b/isort/settings.py @@ -19,6 +19,7 @@ Dict, FrozenSet, Iterable, + Iterator, List, Optional, Pattern, @@ -806,28 +807,43 @@ def find_all_configs(path: str) -> Trie: """ trie_root = Trie("default", {}) - for dirpath, _, _ in os.walk(path): - for config_file_name in CONFIG_SOURCES: - potential_config_file = os.path.join(dirpath, config_file_name) - if os.path.isfile(potential_config_file): - config_data: Dict[str, Any] - try: - config_data = _get_config_data( - potential_config_file, CONFIG_SECTIONS[config_file_name] - ) - except Exception: - warn(f"Failed to pull configuration information from {potential_config_file}") - config_data = {} - - if config_data: - if "directory" not in config_data: - config_data["directory"] = dirpath - trie_root.insert(potential_config_file, config_data) - break + config_sources_set = set(CONFIG_SOURCES) + for potential_config_file in _scanwalk_files( + path, exclude_fn=lambda entry: entry.name in DEFAULT_SKIP + ): + if potential_config_file.name not in config_sources_set: + continue + try: + config_data = _get_config_data( + potential_config_file.path, CONFIG_SECTIONS[potential_config_file.name] + ) + if "directory" not in config_data: + config_data["directory"] = os.path.dirname(potential_config_file.path) + trie_root.insert(potential_config_file.path, config_data) + except Exception: + warn(f"Failed to pull configuration information from {potential_config_file.path}") return trie_root +def _scanwalk_files( + root_path: str, exclude_fn: Callable[[os.DirEntry], bool] = None +) -> Iterator[os.DirEntry]: + # depth-first walk of file system starting with root_path + stack: List[str] = [root_path] + while stack: + dir_path = stack.pop() + with os.scandir(dir_path) as it: + for entry in it: + # Avoid processing excluded files/dirs and their descendants + if exclude_fn and exclude_fn(entry): + continue + if entry.is_dir(): + stack.append(entry.path) + elif entry.is_file(): + yield entry + + def _get_config_data(file_path: str, sections: Tuple[str, ...]) -> Dict[str, Any]: settings: Dict[str, Any] = {} diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index 02eddd09..9350f126 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -258,11 +258,13 @@ def test_find_all_configs(tmpdir): dir2 = tmpdir / "subdir2" dir3 = tmpdir / "subdir3" dir4 = tmpdir / "subdir4" + dir_skip = tmpdir / ".venv" dir1.mkdir() dir2.mkdir() dir3.mkdir() dir4.mkdir() + dir_skip.mkdir() setup_cfg_file = dir1 / "setup.cfg" setup_cfg_file.write_text(setup_cfg, "utf-8") @@ -276,6 +278,9 @@ def test_find_all_configs(tmpdir): pyproject_toml_file_broken = dir4 / "pyproject.toml" pyproject_toml_file_broken.write_text(pyproject_toml_broken, "utf-8") + pyproject_toml_file_skip = dir_skip / "pyproject.toml" + pyproject_toml_file_skip.write_text(pyproject_toml, "utf-8") + config_trie = settings.find_all_configs(str(tmpdir)) config_info_1 = config_trie.search(str(dir1 / "test1.py")) @@ -296,3 +301,7 @@ def test_find_all_configs(tmpdir): config_info_4 = config_trie.search(str(tmpdir / "file4.py")) assert config_info_4[0] == "default" assert set(Config(**config_info_4[1]).src_paths) == {Path.cwd(), Path.cwd().joinpath("src")} + + config_info_skip = config_trie.search(str(dir_skip / "skip.py")) + assert config_info_skip[0] == "default" + assert set(Config(**config_info_skip[1]).src_paths) == {Path.cwd(), Path.cwd().joinpath("src")}