1010class FileManager :
1111 """
1212 Flexible file collector with optional patterns and name based filtering.
13+ Also supports lazy loading (useful for multiprocessing).
1314
1415 Parameters
1516 ----------
@@ -25,6 +26,8 @@ class FileManager:
2526 Drop files whose RELATIVE path contains ANY of these substrings. (Exclude wins.)
2627 recursive: bool
2728 Whether to recursively search subdirectories.
29+ lazy_init : bool
30+ If True, defer file collection until the first access (default: False).
2831 """
2932
3033 def __init__ (
@@ -35,56 +38,154 @@ def __init__(
3538 include_names : list [str ] | None = None ,
3639 exclude_names : list [str ] | None = None ,
3740 recursive : bool = False ,
41+ lazy_init : bool = False ,
3842 ):
3943 self .path = path
4044 self .file_type = file_type
4145 self .pattern = pattern
4246 self .include_names = include_names
4347 self .exclude_names = exclude_names
4448 self .recursive = recursive
45- self .collect_files ()
46- self .filter_files ()
4749
48- def filter_files (self ):
49- if self .include_names is not None :
50- _files_re = [str (_file .relative_to (self .path )) for _file in self .files ]
51- self .files = [
50+ self ._files : list [Path ] | None
51+ if not lazy_init :
52+ self .refresh ()
53+ else :
54+ self ._files = None
55+
56+ def refresh (self ):
57+ """
58+ (Re)collect and filter files immediately.
59+
60+ This method rebuilds the internal file list by scanning the directory and
61+ applying inclusion/exclusion filters.
62+ """
63+ self ._files = self .collect_files (self .path , self .file_type , self .pattern , self .recursive )
64+ self ._files = self .filter_files (
65+ self ._files , self .path , self .include_names , self .exclude_names
66+ )
67+
68+ @property
69+ def files (self ) -> list [Path ]:
70+ """
71+ Lazily returns the collected file list.
72+
73+ If `lazy_init=True` was set and the files have not yet been collected,
74+ this property will automatically trigger a collection.
75+ """
76+ if self ._files is None : # Lazy loading
77+ self .refresh ()
78+ assert self ._files is not None
79+ return self ._files
80+
81+ @files .setter
82+ def files (self , value : list [Path ]):
83+ """Directly override the internal file list (advanced use only)."""
84+ self ._files = value
85+
86+ @staticmethod
87+ def filter_files (
88+ files : list [Path ],
89+ path : Path ,
90+ include_names : list [str ] | None = None ,
91+ exclude_names : list [str ] | None = None ,
92+ ) -> list [Path ]:
93+ """
94+ Filter a list of files based on inclusion or exclusion substrings.
95+
96+ Parameters
97+ ----------
98+ files : list[Path]
99+ Input file list.
100+ path : Path
101+ Root path used to compute relative paths for filtering.
102+ include_names : list[str] | None
103+ Substrings; keep files containing any of these in their relative path.
104+ exclude_names : list[str] | None
105+ Substrings; remove files containing any of these in their relative path.
106+
107+ Returns
108+ -------
109+ list[Path]
110+ Filtered file list.
111+ """
112+ if include_names is not None :
113+ _files_re = [str (_file .relative_to (path )) for _file in files ]
114+ files = [
52115 _file
53- for _file , rel in zip (list (self . files ), _files_re , strict = False )
54- if any (_token in rel for _token in self . include_names )
116+ for _file , rel in zip (list (files ), _files_re , strict = False )
117+ if any (_token in rel for _token in include_names )
55118 ]
56119
57- if self . exclude_names is not None :
58- _files_re = [str (_file .relative_to (self . path )) for _file in self . files ]
59- self . files = [
120+ if exclude_names is not None :
121+ _files_re = [str (_file .relative_to (path )) for _file in files ]
122+ files = [
60123 _file
61- for _file , rel in zip (list (self . files ), _files_re , strict = False )
62- if not any (_token in rel for _token in self . exclude_names )
124+ for _file , rel in zip (list (files ), _files_re , strict = False )
125+ if not any (_token in rel for _token in exclude_names )
63126 ]
127+ return files
64128
65- def collect_files (self ):
66- if self .file_type == "" or self .path == "" :
67- self .files = []
68- return
129+ @staticmethod
130+ def collect_files (
131+ path : Path , file_type : str , pattern : str | None , recursive : bool = False
132+ ) -> list [Path ]:
133+ """
134+ Collect files under the given directory according to a pattern and extension.
135+
136+ Parameters
137+ ----------
138+ path : Path
139+ Root directory to search.
140+ file_type : str
141+ File extension to match (e.g., ".png").
142+ pattern : str | None
143+ Glob-like pattern (e.g., "*_image").
144+ recursive : bool, optional
145+ Whether to recursively search subdirectories.
69146
70- if self .pattern is None :
147+ Returns
148+ -------
149+ list[Path]
150+ Naturally sorted list of file paths.
151+ """
152+ if file_type == "" or path == "" :
153+ return []
154+
155+ if pattern is None :
71156 pattern = "*"
72- elif "*" not in self . pattern :
73- pattern = "*" + self . pattern
157+ elif "*" not in pattern :
158+ pattern = "*" + pattern
74159 else :
75- pattern = self . pattern
160+ pattern = pattern
76161
77- if self . recursive :
78- files = list (Path (self . path ).rglob (pattern + self . file_type ))
162+ if recursive :
163+ files = list (Path (path ).rglob (pattern + file_type ))
79164 else :
80- files = list (Path (self .path ).glob (pattern + self .file_type ))
81- self .files = natsorted (files , key = lambda p : p .name )
165+ files = list (Path (path ).glob (pattern + file_type ))
166+ # self.files = natsorted(files, key=lambda p: p.name)
167+ return natsorted (files , key = lambda p : p .name )
82168
83169 def get_name (self , file : str | int , with_file_type = True ) -> str :
84- """Just keep this for backwards compatibility"""
170+ """Legacy alias for :meth:`name_from_path` (kept for backward compatibility). """
85171 return self .name_from_path (file , with_file_type )
86172
87173 def name_from_path (self , file : str | int , include_ext : bool = True ) -> str :
174+ """
175+ Get the relative name of a file (e.g., 'subdir/sample.png').
176+
177+ Parameters
178+ ----------
179+ file : str | int
180+ File path or index into the internal file list.
181+ include_ext : bool
182+ Whether to keep the file extension.
183+
184+ Returns
185+ -------
186+ str
187+ Relative file name.
188+ """
88189 if isinstance (file , int ):
89190 file = str (self .files [file ])
90191 name = str (Path (file ).relative_to (self .path ))
@@ -93,6 +194,9 @@ def name_from_path(self, file: str | int, include_ext: bool = True) -> str:
93194 return name
94195
95196 def path_from_name (self , name : str | Path , include_ext = True ):
197+ """
198+ Convert a relative name (as from :meth:`name_from_path`) to an absolute path.
199+ """
96200 rel = Path (name )
97201 if include_ext and rel .suffix != self .file_type :
98202 rel = rel .with_suffix (self .file_type )
@@ -107,6 +211,36 @@ def __len__(self):
107211 def __iter__ (self ):
108212 return iter (self .files )
109213
214+ def __getstate__ (self ):
215+ """
216+ Make the object lightweight for pickling.
217+
218+ The file list is omitted to reduce memory footprint when the object is
219+ sent to subprocesses. Workers can rebuild it lazily on first access.
220+ """
221+ return {
222+ "path" : str (self .path ),
223+ "file_type" : self .file_type ,
224+ "pattern" : self .pattern ,
225+ "include_names" : self .include_names ,
226+ "exclude_names" : self .exclude_names ,
227+ "recursive" : self .recursive ,
228+ "_files" : None ,
229+ }
230+
231+ def __setstate__ (self , state ):
232+ """
233+ Restore object state after unpickling (used in multiprocessing).
234+ The file list will be lazily rebuilt on first access.
235+ """
236+ self .path = Path (state ["path" ])
237+ self .file_type = state ["file_type" ]
238+ self .pattern = state ["pattern" ]
239+ self .include_names = state ["include_names" ]
240+ self .exclude_names = state ["exclude_names" ]
241+ self .recursive = state ["recursive" ]
242+ self ._files = state .get ("_files" , None )
243+
110244
111245class FileManagerStacked (FileManager ):
112246 """
0 commit comments