2020import tempfile
2121from contextlib import contextmanager
2222from itertools import accumulate
23- from typing import Callable , Deque , Dict , Iterable , List , Match , Optional , Set , Tuple , Union
23+ from typing import Callable , Iterable , List , Match , Optional , Tuple , Union
2424
2525import llnl .util .symlink
2626from llnl .util import tty
27- from llnl .util .lang import dedupe , fnmatch_translate_multiple , memoized
27+ from llnl .util .lang import dedupe , memoized
2828from llnl .util .symlink import islink , readlink , resolve_link_target_relative_to_the_link , symlink
2929
3030from ..path import path_to_os_path , system_path_filter
@@ -1673,40 +1673,32 @@ def find_first(root: str, files: Union[Iterable[str], str], bfs_depth: int = 2)
16731673 return FindFirstFile (root , * files , bfs_depth = bfs_depth ).find ()
16741674
16751675
1676- def find (
1677- root : Union [str , List [str ]],
1678- files : Union [str , List [str ]],
1679- recursive : bool = True ,
1680- max_depth : Optional [int ] = None ,
1681- ) -> List [str ]:
1682- """Finds all non-directory files matching the filename patterns from ``files`` starting from
1683- ``root``. This function returns a deterministic result for the same input and directory
1684- structure when run multiple times. Symlinked directories are followed, and unique directories
1685- are searched only once. Each matching file is returned only once at lowest depth in case
1686- multiple paths exist due to symlinked directories. The function has similarities to the Unix
1687- ``find`` utility.
1676+ def find (root , files , recursive = True , max_depth : Optional [int ] = None ):
1677+ """Search for ``files`` starting from the ``root`` directory.
1678+
1679+ Like GNU/BSD find but written entirely in Python.
1680+
1681+ Specifically this behaves like `find -type f`: it only returns
1682+ results that are files. When searching recursively, this behaves
1683+ as `find` with the `-L` option (follows symlinks).
16881684
16891685 Examples:
16901686
16911687 .. code-block:: console
16921688
1693- $ find -L /usr -name python3 -type f
1689+ $ find -L /usr -name python
16941690
1695- is roughly equivalent to
1696-
1697- >>> find("/usr", "python3")
1691+ is equivalent to:
16981692
1699- with the notable difference that this function only lists a single path to each file in case of
1700- symlinked directories.
1693+ >>> find('/usr', 'python')
17011694
17021695 .. code-block:: console
17031696
1704- $ find -L /usr/local/bin /usr/local/sbin -maxdepth 1 '(' -name python3 -o -name getcap \\
1705- ')' -type f
1697+ $ find /usr/local/bin -maxdepth 1 -name python
17061698
1707- is roughly equivalent to:
1699+ is equivalent to:
17081700
1709- >>> find([" /usr/local/bin", "/usr/local/sbin"], ["python3", "getcap"] , recursive=False)
1701+ >>> find(' /usr/local/bin', 'python' , recursive=False)
17101702
17111703 Accepts any glob characters accepted by fnmatch:
17121704
@@ -1720,17 +1712,17 @@ def find(
17201712 ========== ====================================
17211713
17221714 Parameters:
1723- root: One or more root directories to start searching from
1724- files: One or more filename patterns to search for
1725- recursive: if False search only root, if True descends from roots. Defaults to True.
1726- max_depth: if set, don't search below this depth. Cannot be set if recursive is False
1715+ root (str): The root directory to start searching from
1716+ files (str or collections.abc.Sequence): Library name(s) to search for
1717+ recursive (bool): if False search only root folder,
1718+ if True descends top-down from the root. Defaults to True.
1719+ max_depth (int): if set, don't search below this depth. Cannot be set
1720+ if recursive is False
17271721
1728- Returns a list of absolute, matching file paths.
1722+ Returns:
1723+ list: The files that have been found
17291724 """
1730- if not isinstance (root , list ):
1731- root = [root ]
1732-
1733- if not isinstance (files , list ):
1725+ if isinstance (files , str ):
17341726 files = [files ]
17351727
17361728 # If recursive is false, max_depth can only be None or 0
@@ -1742,9 +1734,10 @@ def find(
17421734 elif max_depth is None :
17431735 max_depth = sys .maxsize
17441736
1745- tty .debug (f"Find (max depth = { max_depth } ): { root } { files } " )
1746- result = _find_max_depth (root , files , max_depth )
1747- tty .debug (f"Find complete: { root } { files } " )
1737+ tty .debug (f"Find (max depth = { max_depth } ): { root } { str (files )} " )
1738+ result = find_max_depth (root , files , max_depth )
1739+
1740+ tty .debug (f"Find complete: { root } { str (files )} " )
17481741 return result
17491742
17501743
@@ -1753,36 +1746,56 @@ def _log_file_access_issue(e: OSError, path: str) -> None:
17531746 tty .debug (f"find must skip { path } : { errno_name } { e } " )
17541747
17551748
1756- def _dir_id ( s : os . stat_result ) -> Tuple [ int , int ]:
1757- # Note: on windows, st_ino is the file index and st_dev is the volume serial number. See
1758- # https://github.com/python/cpython/blob/3.9/Python/fileutils.c
1759- return ( s . st_ino , s . st_dev )
1749+ @ system_path_filter ( arg_slice = slice ( 1 ))
1750+ def find_max_depth ( root , globs , max_depth : Optional [ int ] = None ):
1751+ """Given a set of non-recursive glob file patterns, finds all
1752+ files matching those patterns up to a maximum specified depth.
17601753
1754+ If a directory has a name which matches an input pattern, it will
1755+ not be included in the results.
17611756
1762- def _find_max_depth (roots : List [str ], globs : List [str ], max_depth : int = sys .maxsize ) -> List [str ]:
1763- """See ``find`` for the public API."""
1764- # Apply normcase to file patterns and filenames to respect case insensitive filesystems
1765- regex , groups = fnmatch_translate_multiple ([os .path .normcase (x ) for x in globs ])
1766- # Ordered dictionary that keeps track of the files found for each pattern
1767- capture_group_to_paths : Dict [str , List [str ]] = {group : [] for group in groups }
1768- # Ensure returned paths are always absolute
1769- roots = [os .path .abspath (r ) for r in roots ]
1770- # Breadth-first search queue. Each element is a tuple of (depth, directory)
1771- dir_queue : Deque [Tuple [int , str ]] = collections .deque ()
1772- # Set of visited directories. Each element is a tuple of (inode, device)
1773- visited_dirs : Set [Tuple [int , int ]] = set ()
1757+ If ``max_depth`` is specified, does not search below that depth.
17741758
1775- for root in roots :
1776- try :
1777- stat_root = os . stat ( root )
1778- except OSError as e :
1779- _log_file_access_issue ( e , root )
1780- continue
1781- dir_id = _dir_id ( stat_root )
1782- if dir_id not in visited_dirs :
1783- dir_queue . appendleft (( 0 , root ))
1784- visited_dirs . add ( dir_id )
1759+ If ``globs`` is a list, files matching earlier entries are placed
1760+ in the return value before files matching later entries.
1761+ """
1762+ try :
1763+ stat_root = os . stat ( root )
1764+ except OSError :
1765+ return []
1766+
1767+ if max_depth is None :
1768+ max_depth = sys . maxsize
17851769
1770+ if isinstance (globs , str ):
1771+ globs = [globs ]
1772+ # Apply normcase to regular expressions and to the filenames:
1773+ # this respects case-sensitivity semantics of different OSes
1774+ # (e.g. file search is typically case-insensitive on Windows)
1775+ regexes = [re .compile (fnmatch .translate (os .path .normcase (x ))) for x in globs ]
1776+
1777+ # Note later calls to os.scandir etc. return abspaths if the
1778+ # input is absolute, see https://docs.python.org/3/library/os.html#os.DirEntry.path
1779+ root = os .path .abspath (root )
1780+
1781+ found_files = collections .defaultdict (list )
1782+
1783+ def _dir_id (stat_info ):
1784+ # Note: on windows, st_ino is the file index and st_dev
1785+ # is the volume serial number. See
1786+ # https://github.com/python/cpython/blob/3.9/Python/fileutils.c
1787+ return (stat_info .st_ino , stat_info .st_dev )
1788+
1789+ visited_dirs = set ([_dir_id (stat_root )])
1790+
1791+ # Each queue item stores the depth and path
1792+ # This achieves a consistent traversal order by iterating through
1793+ # each directory in alphabetical order.
1794+ # This also traverses in BFS order to ensure finding the shortest
1795+ # path to any file (or one of the shortest paths, if there are
1796+ # several - the one returned will be consistent given the prior
1797+ # point).
1798+ dir_queue = collections .deque ([(0 , root )])
17861799 while dir_queue :
17871800 depth , next_dir = dir_queue .pop ()
17881801 try :
@@ -1797,18 +1810,20 @@ def _find_max_depth(roots: List[str], globs: List[str], max_depth: int = sys.max
17971810 try :
17981811 it_is_a_dir = dir_entry .is_dir (follow_symlinks = True )
17991812 except OSError as e :
1800- # Possible permission issue, or a symlink that cannot be resolved (ELOOP).
1813+ # Possible permission issue, or a symlink that cannot
1814+ # be resolved (ELOOP).
18011815 _log_file_access_issue (e , dir_entry .path )
18021816 continue
18031817
1804- if it_is_a_dir and depth < max_depth :
1818+ if it_is_a_dir and ( depth < max_depth ) :
18051819 try :
1806- # The stat should be performed in a try/except block. We repeat that here
1807- # vs. moving to the above block because we only want to call `stat` if we
1808- # haven't exceeded our max_depth
1820+ # The stat should be performed in a try/except block.
1821+ # We repeat that here vs. moving to the above block
1822+ # because we only want to call `stat` if we haven't
1823+ # exceeded our max_depth
18091824 if sys .platform == "win32" :
1810- # Note: st_ino/st_dev on DirEntry.stat are not set on Windows, so we
1811- # have to call os.stat
1825+ # Note: st_ino/st_dev on DirEntry.stat are not set on
1826+ # Windows, so we have to call os.stat
18121827 stat_info = os .stat (dir_entry .path , follow_symlinks = True )
18131828 else :
18141829 stat_info = dir_entry .stat (follow_symlinks = True )
@@ -1821,15 +1836,15 @@ def _find_max_depth(roots: List[str], globs: List[str], max_depth: int = sys.max
18211836 dir_queue .appendleft ((depth + 1 , dir_entry .path ))
18221837 visited_dirs .add (dir_id )
18231838 else :
1824- m = regex . match ( os .path .normcase ( os . path . basename (dir_entry .path )) )
1825- if not m :
1826- continue
1827- for group in capture_group_to_paths :
1828- if m . group ( group ):
1829- capture_group_to_paths [ group ]. append ( dir_entry . path )
1830- break
1839+ fname = os .path .basename (dir_entry .path )
1840+ for pattern in regexes :
1841+ if pattern . match ( os . path . normcase ( fname )):
1842+ found_files [ pattern ]. append ( os . path . join ( next_dir , fname ))
1843+
1844+ # TODO: for fully-recursive searches, we can print a warning after
1845+ # after having searched everything up to some fixed depth
18311846
1832- return [ path for paths in capture_group_to_paths . values () for path in paths ]
1847+ return list ( itertools . chain ( * [ found_files [ x ] for x in regexes ]))
18331848
18341849
18351850# Utilities for libraries and headers
0 commit comments