11use async_stream:: try_stream;
2- use globset:: { Glob , GlobSet , GlobSetBuilder } ;
32use log:: warn;
43use std:: borrow:: Cow ;
5- use std:: path:: Path ;
64use std:: { path:: PathBuf , sync:: Arc } ;
75
6+ use super :: shared:: pattern_matcher:: PatternMatcher ;
87use crate :: base:: field_attrs;
98use crate :: { fields_value, ops:: sdk:: * } ;
109
@@ -19,23 +18,7 @@ pub struct Spec {
1918struct Executor {
2019 root_path : PathBuf ,
2120 binary : bool ,
22- included_glob_set : Option < GlobSet > ,
23- excluded_glob_set : Option < GlobSet > ,
24- }
25-
26- impl Executor {
27- fn is_excluded ( & self , path : impl AsRef < Path > + Copy ) -> bool {
28- self . excluded_glob_set
29- . as_ref ( )
30- . is_some_and ( |glob_set| glob_set. is_match ( path) )
31- }
32-
33- fn is_file_included ( & self , path : impl AsRef < Path > + Copy ) -> bool {
34- self . included_glob_set
35- . as_ref ( )
36- . is_none_or ( |glob_set| glob_set. is_match ( path) )
37- && !self . is_excluded ( path)
38- }
21+ pattern_matcher : PatternMatcher ,
3922}
4023
4124#[ async_trait]
@@ -57,26 +40,25 @@ impl SourceExecutor for Executor {
5740 for _ in 0 ..root_component_size {
5841 path_components. next( ) ;
5942 }
60- let relative_path = path_components. as_path( ) ;
43+ let Some ( relative_path) = path_components. as_path( ) . to_str( ) else {
44+ warn!( "Skipped ill-formed file path: {}" , path. display( ) ) ;
45+ continue ;
46+ } ;
6147 if path. is_dir( ) {
62- if !self . is_excluded( relative_path) {
48+ if !self . pattern_matcher . is_excluded( relative_path) {
6349 new_dirs. push( Cow :: Owned ( path) ) ;
6450 }
65- } else if self . is_file_included( relative_path) {
51+ } else if self . pattern_matcher . is_file_included( relative_path) {
6652 let ordinal: Option <Ordinal > = if options. include_ordinal {
6753 Some ( path. metadata( ) ?. modified( ) ?. try_into( ) ?)
6854 } else {
6955 None
7056 } ;
71- if let Some ( relative_path) = relative_path. to_str( ) {
72- yield vec![ PartialSourceRowMetadata {
73- key: KeyValue :: Str ( relative_path. into( ) ) ,
74- key_aux_info: serde_json:: Value :: Null ,
75- ordinal,
76- } ] ;
77- } else {
78- warn!( "Skipped ill-formed file path: {}" , path. display( ) ) ;
79- }
57+ yield vec![ PartialSourceRowMetadata {
58+ key: KeyValue :: Str ( relative_path. into( ) ) ,
59+ key_aux_info: serde_json:: Value :: Null ,
60+ ordinal,
61+ } ] ;
8062 }
8163 }
8264 dirs. extend( new_dirs. drain( ..) . rev( ) ) ;
@@ -91,7 +73,10 @@ impl SourceExecutor for Executor {
9173 _key_aux_info : & serde_json:: Value ,
9274 options : & SourceExecutorGetOptions ,
9375 ) -> Result < PartialSourceRowData > {
94- if !self . is_file_included ( key. str_value ( ) ?. as_ref ( ) ) {
76+ if !self
77+ . pattern_matcher
78+ . is_file_included ( key. str_value ( ) ?. as_ref ( ) )
79+ {
9580 return Ok ( PartialSourceRowData {
9681 value : Some ( SourceValue :: NonExistence ) ,
9782 ordinal : Some ( Ordinal :: unavailable ( ) ) ,
@@ -173,16 +158,7 @@ impl SourceFactoryBase for Factory {
173158 Ok ( Box :: new ( Executor {
174159 root_path : PathBuf :: from ( spec. path ) ,
175160 binary : spec. binary ,
176- included_glob_set : spec. included_patterns . map ( build_glob_set) . transpose ( ) ?,
177- excluded_glob_set : spec. excluded_patterns . map ( build_glob_set) . transpose ( ) ?,
161+ pattern_matcher : PatternMatcher :: new ( spec. included_patterns , spec. excluded_patterns ) ?,
178162 } ) )
179163 }
180164}
181-
182- fn build_glob_set ( patterns : Vec < String > ) -> Result < GlobSet > {
183- let mut builder = GlobSetBuilder :: new ( ) ;
184- for pattern in patterns {
185- builder. add ( Glob :: new ( pattern. as_str ( ) ) ?) ;
186- }
187- Ok ( builder. build ( ) ?)
188- }
0 commit comments