77from typing import TYPE_CHECKING
88
99from onetl .file import FileDFReader , FileDFWriter , FileDownloader , FileUploader
10+ from onetl .file .filter import FileSizeRange , Glob , Regexp
1011
1112from syncmaster .worker .handlers .file .base import FileHandler
1213
@@ -23,6 +24,7 @@ def read(self) -> DataFrame:
2324 connection = self .connection ,
2425 source_path = self .transfer_dto .directory_path ,
2526 local_path = self .temp_dir .name ,
27+ filters = self ._get_file_metadata_filters (),
2628 )
2729 downloader .run ()
2830
@@ -65,3 +67,28 @@ def write(self, df: DataFrame) -> None:
6567 options = self .transfer_dto .options ,
6668 )
6769 uploader .run ()
70+
71+ def _make_file_metadata_filters (self , filters : list [dict ]) -> list [Glob | Regexp | FileSizeRange ]:
72+ processed_filters = []
73+ for filter in filters :
74+ filter_type = filter ["type" ]
75+ value = filter ["value" ]
76+
77+ if filter_type == "name_glob" :
78+ processed_filters .append (Glob (value ))
79+ elif filter_type == "name_regexp" :
80+ processed_filters .append (Regexp (value ))
81+ elif filter_type == "file_size_min" :
82+ processed_filters .append (FileSizeRange (min = value ))
83+ elif filter_type == "file_size_max" :
84+ processed_filters .append (FileSizeRange (max = value ))
85+
86+ return processed_filters
87+
88+ def _get_file_metadata_filters (self ) -> list [Glob | Regexp | FileSizeRange ]:
89+ expressions = []
90+ for transformation in self .transfer_dto .transformations :
91+ if transformation ["type" ] == "file_metadata_filter" :
92+ expressions .extend (transformation ["filters" ])
93+
94+ return self ._make_file_metadata_filters (expressions )
0 commit comments