1- use std:: sync :: atomic :: AtomicBool ;
1+ use std:: path :: PathBuf ;
22
3+ use crossbeam_queue:: SegQueue ;
34use vortex:: error:: { VortexResult , vortex_bail, vortex_err} ;
45use vortex:: file:: { VortexFile , VortexOpenOptions } ;
56
@@ -9,14 +10,13 @@ use crate::duckdb::{
910use crate :: exporter:: ArrayIteratorExporter ;
1011
1112pub struct VortexBindData {
12- first_file : VortexFile ,
13+ _first_file : VortexFile ,
14+ file_list : SegQueue < PathBuf > ,
1315 _column_names : Vec < String > ,
1416 _column_types : Vec < LogicalType > ,
1517}
1618
17- pub struct VortexGlobalData {
18- done : AtomicBool ,
19- }
19+ pub struct VortexGlobalData { }
2020
2121pub struct VortexLocalData {
2222 exporter : Option < ArrayIteratorExporter > ,
@@ -63,7 +63,6 @@ impl TableFunction for VortexTableFunction {
6363 }
6464
6565 fn bind ( input : & BindInput , result : & mut BindResult ) -> VortexResult < Self :: BindData > {
66- // TODO: expand glob & assign to file list
6766 let file_glob_string = input
6867 . get_parameter ( 0 )
6968 . ok_or_else ( || vortex_err ! ( "Missing file glob parameter" ) ) ?;
@@ -80,8 +79,22 @@ impl TableFunction for VortexTableFunction {
8079 result. add_result_column ( name, logical_type) ;
8180 }
8281
82+ let paths = match glob:: glob ( file_glob_string. as_string ( ) . to_str ( ) ?) {
83+ Ok ( paths) => paths,
84+ Err ( e) => vortex_bail ! ( "Failed to glob files: {}" , e) ,
85+ } ;
86+
87+ let file_list = SegQueue :: new ( ) ;
88+ for path in paths {
89+ match path {
90+ Ok ( path) => file_list. push ( path) ,
91+ Err ( e) => vortex_bail ! ( "Failed to glob files: {}" , e) ,
92+ }
93+ }
94+
8395 Ok ( VortexBindData {
84- first_file,
96+ file_list,
97+ _first_file : first_file,
8598 _column_names : column_names,
8699 _column_types : column_types,
87100 } )
@@ -90,23 +103,26 @@ impl TableFunction for VortexTableFunction {
90103 fn scan (
91104 bind_data : & Self :: BindData ,
92105 local_state : & mut Self :: LocalState ,
93- global_state : & mut Self :: GlobalState ,
106+ _global_state : & mut Self :: GlobalState ,
94107 chunk : & mut DataChunk ,
95108 ) -> VortexResult < ( ) > {
96- if global_state. done . load ( std:: sync:: atomic:: Ordering :: SeqCst ) {
97- // Signal to DuckDB that there's no work left by setting the chunk length to 0.
98- chunk. set_len ( 0 ) ;
99- return Ok ( ( ) ) ;
100- }
101-
102109 if local_state. exporter . is_none ( ) {
103- let array_iter = bind_data
104- . first_file
105- . scan ( ) ?
106- . into_array_iter ( )
107- . map_err ( |e| vortex_err ! ( "Failed to create array iterator: {}" , e) ) ?;
110+ if let Some ( file_path) = bind_data. file_list . pop ( ) {
111+ let file = VortexOpenOptions :: file ( )
112+ . open_blocking ( & file_path)
113+ . map_err ( |e| vortex_err ! ( "Failed to open Vortex file: {}" , e) ) ?;
108114
109- local_state. exporter = Some ( ArrayIteratorExporter :: new ( Box :: new ( array_iter) ) ) ;
115+ let array_iter = file
116+ . scan ( ) ?
117+ . into_array_iter ( )
118+ . map_err ( |e| vortex_err ! ( "Failed to create array iterator: {}" , e) ) ?;
119+
120+ local_state. exporter = Some ( ArrayIteratorExporter :: new ( Box :: new ( array_iter) ) ) ;
121+ } else {
122+ // If the exporter is None and there are no more files to process, signal that the scan finished.
123+ chunk. set_len ( 0 ) ;
124+ return Ok ( ( ) ) ;
125+ }
110126 }
111127
112128 let Some ( ref mut exporter) = local_state. exporter else {
@@ -118,18 +134,14 @@ impl TableFunction for VortexTableFunction {
118134 . map_err ( |e| vortex_err ! ( "Failed to export data: {}" , e) ) ?;
119135
120136 if !is_data_left_to_scan {
121- global_state
122- . done
123- . store ( true , std:: sync:: atomic:: Ordering :: SeqCst ) ;
137+ local_state. exporter = None ;
124138 }
125139
126140 Ok ( ( ) )
127141 }
128142
129143 fn init_global ( _init : & TableInitInput < Self > ) -> VortexResult < Self :: GlobalState > {
130- Ok ( VortexGlobalData {
131- done : AtomicBool :: new ( false ) ,
132- } )
144+ Ok ( VortexGlobalData { } )
133145 }
134146
135147 fn init_local (
@@ -146,3 +158,113 @@ impl TableFunction for VortexTableFunction {
146158 Ok ( false )
147159 }
148160}
161+
162+ #[ cfg( test) ]
163+ mod tests {
164+ use duckdb:: Connection ;
165+ use tempfile:: NamedTempFile ;
166+ use vortex:: IntoArray ;
167+ use vortex:: arrays:: { BoolArray , ConstantArray , PrimitiveArray , StructArray , VarBinArray } ;
168+ use vortex:: file:: VortexWriteOptions ;
169+ use vortex:: scalar:: Scalar ;
170+ use vortex:: validity:: Validity ;
171+
172+ use super :: * ;
173+ use crate :: duckdb:: Database ;
174+
175+ fn database_connection ( ) -> Connection {
176+ let db = Database :: open_in_memory ( ) . unwrap ( ) ;
177+ let connection = db. connect ( ) . unwrap ( ) ;
178+ connection
179+ . register_table_function :: < VortexTableFunction > ( c"vortex_scan" )
180+ . unwrap ( ) ;
181+ unsafe { Connection :: open_from_raw ( db. as_ptr ( ) . cast ( ) ) } . unwrap ( )
182+ }
183+
184+ fn create_temp_file ( ) -> NamedTempFile {
185+ NamedTempFile :: new ( ) . unwrap ( )
186+ }
187+
188+ async fn write_vortex_file ( field_name : & str , array : impl IntoArray ) -> NamedTempFile {
189+ let temp_file_path = create_temp_file ( ) ;
190+
191+ let struct_array = StructArray :: from_fields ( & [ ( field_name, array. into_array ( ) ) ] ) . unwrap ( ) ;
192+ let file = tokio:: fs:: File :: create ( & temp_file_path) . await . unwrap ( ) ;
193+ VortexWriteOptions :: default ( )
194+ . write ( file, struct_array. to_array_stream ( ) )
195+ . await
196+ . unwrap ( ) ;
197+
198+ temp_file_path
199+ }
200+
201+ fn scan_vortex_file < T > ( tmp_file : NamedTempFile , query : & str ) -> T
202+ where
203+ T : duckdb:: types:: FromSql ,
204+ {
205+ let conn = database_connection ( ) ;
206+ conn. prepare ( query)
207+ . unwrap ( )
208+ . query_row ( [ tmp_file. path ( ) . to_string_lossy ( ) ] , |row| row. get ( 0 ) )
209+ . unwrap ( )
210+ }
211+
212+ #[ test]
213+ fn test_scan_function_registration ( ) {
214+ let conn = database_connection ( ) ;
215+ let result: String = conn
216+ . prepare (
217+ "SELECT function_name FROM duckdb_functions() WHERE function_name = 'vortex_scan'" ,
218+ )
219+ . unwrap ( )
220+ . query_row ( [ ] , |row| row. get ( 0 ) )
221+ . unwrap ( ) ;
222+ assert_eq ! ( & result, "vortex_scan" ) ;
223+ }
224+
225+ #[ tokio:: test]
226+ async fn test_vortex_scan_strings ( ) {
227+ let strings = VarBinArray :: from ( vec ! [ "Hello" , "Hi" , "Hey" ] ) ;
228+ let file = write_vortex_file ( "strings" , strings) . await ;
229+ let result: String =
230+ scan_vortex_file ( file, "SELECT string_agg(strings, ',') FROM vortex_scan(?)" ) ;
231+ assert_eq ! ( result, "Hello,Hi,Hey" ) ;
232+ }
233+
234+ #[ tokio:: test]
235+ async fn test_vortex_scan_integers ( ) {
236+ let numbers = PrimitiveArray :: from_iter ( [ 1i32 , 42 , 100 , -5 , 0 ] ) ;
237+ let file = write_vortex_file ( "number" , numbers) . await ;
238+ let sum: i64 = scan_vortex_file ( file, "SELECT SUM(number) FROM vortex_scan(?)" ) ;
239+ assert_eq ! ( sum, 138 ) ;
240+ }
241+
242+ #[ tokio:: test]
243+ async fn test_vortex_scan_floats ( ) {
244+ let values = PrimitiveArray :: from_iter ( [ 1.5f64 , -2.5 , 0.0 , 42.42 ] ) ;
245+ let file = write_vortex_file ( "value" , values) . await ;
246+ let count: i64 =
247+ scan_vortex_file ( file, "SELECT COUNT(*) FROM vortex_scan(?) WHERE value > 0" ) ;
248+ assert_eq ! ( count, 2 ) ;
249+ }
250+
251+ #[ tokio:: test]
252+ async fn test_vortex_scan_constant ( ) {
253+ let constant = ConstantArray :: new ( Scalar :: from ( 42i32 ) , 100 ) ;
254+ let file = write_vortex_file ( "constant" , constant) . await ;
255+ let value: i32 = scan_vortex_file ( file, "SELECT constant FROM vortex_scan(?) LIMIT 1" ) ;
256+ assert_eq ! ( value, 42 ) ;
257+ }
258+
259+ #[ tokio:: test]
260+ async fn test_vortex_scan_booleans ( ) {
261+ let flags = vec ! [ true , false , true , true , false ] ;
262+ let flags_array = BoolArray :: new ( flags. into ( ) , Validity :: NonNullable ) ;
263+ let file = write_vortex_file ( "flag" , flags_array) . await ;
264+ let true_count: i64 = scan_vortex_file (
265+ file,
266+ "SELECT COUNT(*) FROM vortex_scan(?) WHERE flag = true" ,
267+ ) ;
268+ assert_eq ! ( true_count, 3 ) ;
269+ }
270+ }
0 commit comments