Skip to content

Commit 60ba61f

Browse files
haampiescheibelp
authored andcommitted
Revert "llnl.util.filesystem.find: multiple entrypoints (spack#47436)"
This reverts commit 73219e4.
1 parent 0a4563f commit 60ba61f

File tree

4 files changed

+130
-178
lines changed

4 files changed

+130
-178
lines changed

lib/spack/llnl/util/filesystem.py

Lines changed: 92 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
import tempfile
2121
from contextlib import contextmanager
2222
from itertools import accumulate
23-
from typing import Callable, Deque, Dict, Iterable, List, Match, Optional, Set, Tuple, Union
23+
from typing import Callable, Iterable, List, Match, Optional, Tuple, Union
2424

2525
import llnl.util.symlink
2626
from llnl.util import tty
27-
from llnl.util.lang import dedupe, fnmatch_translate_multiple, memoized
27+
from llnl.util.lang import dedupe, memoized
2828
from llnl.util.symlink import islink, readlink, resolve_link_target_relative_to_the_link, symlink
2929

3030
from ..path import path_to_os_path, system_path_filter
@@ -1673,40 +1673,32 @@ def find_first(root: str, files: Union[Iterable[str], str], bfs_depth: int = 2)
16731673
return FindFirstFile(root, *files, bfs_depth=bfs_depth).find()
16741674

16751675

1676-
def find(
1677-
root: Union[str, List[str]],
1678-
files: Union[str, List[str]],
1679-
recursive: bool = True,
1680-
max_depth: Optional[int] = None,
1681-
) -> List[str]:
1682-
"""Finds all non-directory files matching the filename patterns from ``files`` starting from
1683-
``root``. This function returns a deterministic result for the same input and directory
1684-
structure when run multiple times. Symlinked directories are followed, and unique directories
1685-
are searched only once. Each matching file is returned only once at lowest depth in case
1686-
multiple paths exist due to symlinked directories. The function has similarities to the Unix
1687-
``find`` utility.
1676+
def find(root, files, recursive=True, max_depth: Optional[int] = None):
1677+
"""Search for ``files`` starting from the ``root`` directory.
1678+
1679+
Like GNU/BSD find but written entirely in Python.
1680+
1681+
Specifically this behaves like `find -type f`: it only returns
1682+
results that are files. When searching recursively, this behaves
1683+
as `find` with the `-L` option (follows symlinks).
16881684
16891685
Examples:
16901686
16911687
.. code-block:: console
16921688
1693-
$ find -L /usr -name python3 -type f
1689+
$ find -L /usr -name python
16941690
1695-
is roughly equivalent to
1696-
1697-
>>> find("/usr", "python3")
1691+
is equivalent to:
16981692
1699-
with the notable difference that this function only lists a single path to each file in case of
1700-
symlinked directories.
1693+
>>> find('/usr', 'python')
17011694
17021695
.. code-block:: console
17031696
1704-
$ find -L /usr/local/bin /usr/local/sbin -maxdepth 1 '(' -name python3 -o -name getcap \\
1705-
')' -type f
1697+
$ find /usr/local/bin -maxdepth 1 -name python
17061698
1707-
is roughly equivalent to:
1699+
is equivalent to:
17081700
1709-
>>> find(["/usr/local/bin", "/usr/local/sbin"], ["python3", "getcap"], recursive=False)
1701+
>>> find('/usr/local/bin', 'python', recursive=False)
17101702
17111703
Accepts any glob characters accepted by fnmatch:
17121704
@@ -1720,17 +1712,17 @@ def find(
17201712
========== ====================================
17211713
17221714
Parameters:
1723-
root: One or more root directories to start searching from
1724-
files: One or more filename patterns to search for
1725-
recursive: if False search only root, if True descends from roots. Defaults to True.
1726-
max_depth: if set, don't search below this depth. Cannot be set if recursive is False
1715+
root (str): The root directory to start searching from
1716+
files (str or collections.abc.Sequence): Library name(s) to search for
1717+
recursive (bool): if False search only root folder,
1718+
if True descends top-down from the root. Defaults to True.
1719+
max_depth (int): if set, don't search below this depth. Cannot be set
1720+
if recursive is False
17271721
1728-
Returns a list of absolute, matching file paths.
1722+
Returns:
1723+
list: The files that have been found
17291724
"""
1730-
if not isinstance(root, list):
1731-
root = [root]
1732-
1733-
if not isinstance(files, list):
1725+
if isinstance(files, str):
17341726
files = [files]
17351727

17361728
# If recursive is false, max_depth can only be None or 0
@@ -1742,9 +1734,10 @@ def find(
17421734
elif max_depth is None:
17431735
max_depth = sys.maxsize
17441736

1745-
tty.debug(f"Find (max depth = {max_depth}): {root} {files}")
1746-
result = _find_max_depth(root, files, max_depth)
1747-
tty.debug(f"Find complete: {root} {files}")
1737+
tty.debug(f"Find (max depth = {max_depth}): {root} {str(files)}")
1738+
result = find_max_depth(root, files, max_depth)
1739+
1740+
tty.debug(f"Find complete: {root} {str(files)}")
17481741
return result
17491742

17501743

@@ -1753,36 +1746,56 @@ def _log_file_access_issue(e: OSError, path: str) -> None:
17531746
tty.debug(f"find must skip {path}: {errno_name} {e}")
17541747

17551748

1756-
def _dir_id(s: os.stat_result) -> Tuple[int, int]:
1757-
# Note: on windows, st_ino is the file index and st_dev is the volume serial number. See
1758-
# https://github.com/python/cpython/blob/3.9/Python/fileutils.c
1759-
return (s.st_ino, s.st_dev)
1749+
@system_path_filter(arg_slice=slice(1))
1750+
def find_max_depth(root, globs, max_depth: Optional[int] = None):
1751+
"""Given a set of non-recursive glob file patterns, finds all
1752+
files matching those patterns up to a maximum specified depth.
17601753
1754+
If a directory has a name which matches an input pattern, it will
1755+
not be included in the results.
17611756
1762-
def _find_max_depth(roots: List[str], globs: List[str], max_depth: int = sys.maxsize) -> List[str]:
1763-
"""See ``find`` for the public API."""
1764-
# Apply normcase to file patterns and filenames to respect case insensitive filesystems
1765-
regex, groups = fnmatch_translate_multiple([os.path.normcase(x) for x in globs])
1766-
# Ordered dictionary that keeps track of the files found for each pattern
1767-
capture_group_to_paths: Dict[str, List[str]] = {group: [] for group in groups}
1768-
# Ensure returned paths are always absolute
1769-
roots = [os.path.abspath(r) for r in roots]
1770-
# Breadth-first search queue. Each element is a tuple of (depth, directory)
1771-
dir_queue: Deque[Tuple[int, str]] = collections.deque()
1772-
# Set of visited directories. Each element is a tuple of (inode, device)
1773-
visited_dirs: Set[Tuple[int, int]] = set()
1757+
If ``max_depth`` is specified, does not search below that depth.
17741758
1775-
for root in roots:
1776-
try:
1777-
stat_root = os.stat(root)
1778-
except OSError as e:
1779-
_log_file_access_issue(e, root)
1780-
continue
1781-
dir_id = _dir_id(stat_root)
1782-
if dir_id not in visited_dirs:
1783-
dir_queue.appendleft((0, root))
1784-
visited_dirs.add(dir_id)
1759+
If ``globs`` is a list, files matching earlier entries are placed
1760+
in the return value before files matching later entries.
1761+
"""
1762+
try:
1763+
stat_root = os.stat(root)
1764+
except OSError:
1765+
return []
1766+
1767+
if max_depth is None:
1768+
max_depth = sys.maxsize
17851769

1770+
if isinstance(globs, str):
1771+
globs = [globs]
1772+
# Apply normcase to regular expressions and to the filenames:
1773+
# this respects case-sensitivity semantics of different OSes
1774+
# (e.g. file search is typically case-insensitive on Windows)
1775+
regexes = [re.compile(fnmatch.translate(os.path.normcase(x))) for x in globs]
1776+
1777+
# Note later calls to os.scandir etc. return abspaths if the
1778+
# input is absolute, see https://docs.python.org/3/library/os.html#os.DirEntry.path
1779+
root = os.path.abspath(root)
1780+
1781+
found_files = collections.defaultdict(list)
1782+
1783+
def _dir_id(stat_info):
1784+
# Note: on windows, st_ino is the file index and st_dev
1785+
# is the volume serial number. See
1786+
# https://github.com/python/cpython/blob/3.9/Python/fileutils.c
1787+
return (stat_info.st_ino, stat_info.st_dev)
1788+
1789+
visited_dirs = set([_dir_id(stat_root)])
1790+
1791+
# Each queue item stores the depth and path
1792+
# This achieves a consistent traversal order by iterating through
1793+
# each directory in alphabetical order.
1794+
# This also traverses in BFS order to ensure finding the shortest
1795+
# path to any file (or one of the shortest paths, if there are
1796+
# several - the one returned will be consistent given the prior
1797+
# point).
1798+
dir_queue = collections.deque([(0, root)])
17861799
while dir_queue:
17871800
depth, next_dir = dir_queue.pop()
17881801
try:
@@ -1797,18 +1810,20 @@ def _find_max_depth(roots: List[str], globs: List[str], max_depth: int = sys.max
17971810
try:
17981811
it_is_a_dir = dir_entry.is_dir(follow_symlinks=True)
17991812
except OSError as e:
1800-
# Possible permission issue, or a symlink that cannot be resolved (ELOOP).
1813+
# Possible permission issue, or a symlink that cannot
1814+
# be resolved (ELOOP).
18011815
_log_file_access_issue(e, dir_entry.path)
18021816
continue
18031817

1804-
if it_is_a_dir and depth < max_depth:
1818+
if it_is_a_dir and (depth < max_depth):
18051819
try:
1806-
# The stat should be performed in a try/except block. We repeat that here
1807-
# vs. moving to the above block because we only want to call `stat` if we
1808-
# haven't exceeded our max_depth
1820+
# The stat should be performed in a try/except block.
1821+
# We repeat that here vs. moving to the above block
1822+
# because we only want to call `stat` if we haven't
1823+
# exceeded our max_depth
18091824
if sys.platform == "win32":
1810-
# Note: st_ino/st_dev on DirEntry.stat are not set on Windows, so we
1811-
# have to call os.stat
1825+
# Note: st_ino/st_dev on DirEntry.stat are not set on
1826+
# Windows, so we have to call os.stat
18121827
stat_info = os.stat(dir_entry.path, follow_symlinks=True)
18131828
else:
18141829
stat_info = dir_entry.stat(follow_symlinks=True)
@@ -1821,15 +1836,15 @@ def _find_max_depth(roots: List[str], globs: List[str], max_depth: int = sys.max
18211836
dir_queue.appendleft((depth + 1, dir_entry.path))
18221837
visited_dirs.add(dir_id)
18231838
else:
1824-
m = regex.match(os.path.normcase(os.path.basename(dir_entry.path)))
1825-
if not m:
1826-
continue
1827-
for group in capture_group_to_paths:
1828-
if m.group(group):
1829-
capture_group_to_paths[group].append(dir_entry.path)
1830-
break
1839+
fname = os.path.basename(dir_entry.path)
1840+
for pattern in regexes:
1841+
if pattern.match(os.path.normcase(fname)):
1842+
found_files[pattern].append(os.path.join(next_dir, fname))
1843+
1844+
# TODO: for fully-recursive searches, we can print a warning after
1845+
# after having searched everything up to some fixed depth
18311846

1832-
return [path for paths in capture_group_to_paths.values() for path in paths]
1847+
return list(itertools.chain(*[found_files[x] for x in regexes]))
18331848

18341849

18351850
# Utilities for libraries and headers

lib/spack/llnl/util/lang.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55

66
import collections.abc
77
import contextlib
8-
import fnmatch
98
import functools
109
import itertools
1110
import os
1211
import re
1312
import sys
1413
import traceback
15-
import typing
1614
import warnings
1715
from datetime import datetime, timedelta
1816
from typing import Callable, Iterable, List, Tuple, TypeVar
@@ -861,32 +859,6 @@ def elide_list(line_list: List[str], max_num: int = 10) -> List[str]:
861859
return line_list
862860

863861

864-
if sys.version_info >= (3, 9):
865-
PatternStr = re.Pattern[str]
866-
else:
867-
PatternStr = typing.Pattern[str]
868-
869-
870-
def fnmatch_translate_multiple(patterns: List[str]) -> Tuple[PatternStr, List[str]]:
871-
"""Same as fnmatch.translate, but creates a single regex of the form
872-
``(?P<pattern0>...)|(?P<pattern1>...)|...`` for each pattern in the iterable, where
873-
``patternN`` is a named capture group that matches the corresponding pattern translated by
874-
``fnmatch.translate``. This can be used to match multiple patterns in a single pass. No case
875-
normalization is performed on the patterns.
876-
877-
Args:
878-
patterns: list of fnmatch patterns
879-
880-
Returns:
881-
Tuple of the combined regex and the list of named capture groups corresponding to each
882-
pattern in the input list.
883-
"""
884-
groups = [f"pattern{i}" for i in range(len(patterns))]
885-
regexes = (fnmatch.translate(p) for p in patterns)
886-
combined = re.compile("|".join(f"(?P<{g}>{r})" for g, r in zip(groups, regexes)))
887-
return combined, groups
888-
889-
890862
@contextlib.contextmanager
891863
def nullcontext(*args, **kwargs):
892864
"""Empty context manager.

0 commit comments

Comments
 (0)