|
11 | 11 | from concurrent.futures import wait |
12 | 12 | from threading import Lock |
13 | 13 | from typing import Dict |
14 | | -from typing import List |
15 | 14 | from typing import Optional |
16 | 15 | from typing import Tuple |
17 | 16 |
|
@@ -108,6 +107,26 @@ def get_executor(self): |
108 | 107 | ) |
109 | 108 | return FileSystemTable._executor |
110 | 109 |
|
| 110 | + def get_list_of_blob_names(self, prefix: str, predicates=None): |
| 111 | + """ |
| 112 | + Get list of blob names (file paths) matching the prefix. |
| 113 | +
|
| 114 | + Args: |
| 115 | + prefix: Directory/path prefix to list files from |
| 116 | + predicates: Optional predicates (not used for file listing) |
| 117 | +
|
| 118 | + Returns: |
| 119 | + List of file paths |
| 120 | + """ |
| 121 | + from pyarrow.fs import FileSelector |
| 122 | + |
| 123 | + # Create file selector to list files recursively |
| 124 | + selector = FileSelector(prefix, recursive=True) |
| 125 | + file_infos = self.filesystem.get_file_info(selector) |
| 126 | + |
| 127 | + # Extract paths from FileInfo objects |
| 128 | + return [info.path for info in file_infos] |
| 129 | + |
111 | 130 | def read_blob( |
112 | 131 | self, *, blob_name: str, decoder, just_schema=False, projection=None, selection=None |
113 | 132 | ): |
@@ -185,32 +204,6 @@ def blocking_read(): |
185 | 204 | telemetry.bytes_read += len(data) |
186 | 205 | return ref |
187 | 206 |
|
188 | | - def get_list_of_blob_names(self, *, prefix: str, predicates: list = []) -> List[str]: |
189 | | - """ |
190 | | - List all blobs matching the prefix. |
191 | | -
|
192 | | - Args: |
193 | | - prefix: Path prefix to search |
194 | | - predicates: Optional predicates for filtering (subclasses may use this) |
195 | | -
|
196 | | - Returns: |
197 | | - List of blob paths |
198 | | - """ |
199 | | - from pyarrow.fs import FileSelector |
200 | | - |
201 | | - # Use filesystem's file listing |
202 | | - selector = FileSelector(prefix, recursive=True) |
203 | | - file_infos = self.filesystem.get_file_info(selector) |
204 | | - |
205 | | - # Filter for valid file extensions |
206 | | - blob_names = [ |
207 | | - info.path |
208 | | - for info in file_infos |
209 | | - if info.is_file and info.path.endswith(TUPLE_OF_VALID_EXTENSIONS) |
210 | | - ] |
211 | | - |
212 | | - return blob_names |
213 | | - |
214 | 207 | def read_dataset( |
215 | 208 | self, |
216 | 209 | columns: list = None, |
|
0 commit comments