@@ -27,8 +27,6 @@ pub fn tpch_query_from_dir(queries_dir: &std::path::Path, num: u8) -> String {
2727}
2828pub const NUM_QUERIES : u8 = 22 ; // number of queries in the TPCH benchmark numbered from 1 to 22
2929
30- const SCALE_FACTOR : f64 = 0.001 ;
31-
3230pub fn tpch_table ( name : & str ) -> Arc < dyn TableProvider > {
3331 let schema = Arc :: new ( get_tpch_table_schema ( name) ) ;
3432 Arc :: new ( MemTable :: try_new ( schema, vec ! [ ] ) . unwrap ( ) )
@@ -158,33 +156,33 @@ where
158156 Ok ( ( ) )
159157}
160158
161- macro_rules! must_generate_tpch_table {
162- ( $generator: ident, $arrow: ident, $name: literal, $data_dir: expr) => {
163- let data_dir = $data_dir. join( $name) ;
164- fs:: create_dir_all( data_dir. clone( ) ) . expect( "Failed to create data directory" ) ;
165- // create three partitions for the table
166- ( 1 ..=3 ) . for_each( |part| {
167- generate_table(
168- // TODO: Consider adjusting the partitions and batch sizes.
169- $arrow:: new( $generator:: new( SCALE_FACTOR , part, 3 ) ) . with_batch_size( 1000 ) ,
170- & format!( "{part}" ) ,
171- & data_dir. clone( ) . into_boxed_path( ) ,
172- )
173- . expect( concat!( "Failed to generate " , $name, " table" ) ) ;
174- } ) ;
175- } ;
176- }
177-
178159// generate_tpch_data generates all TPC-H tables in the specified data directory.
179- pub fn generate_tpch_data ( data_dir : & std:: path:: Path ) {
160+ pub fn generate_tpch_data ( data_dir : & std:: path:: Path , sf : f64 , parts : i32 ) {
180161 fs:: create_dir_all ( data_dir) . expect ( "Failed to create data directory" ) ;
181162
182- must_generate_tpch_table ! ( RegionGenerator , RegionArrow , "region" , data_dir) ;
183- must_generate_tpch_table ! ( NationGenerator , NationArrow , "nation" , data_dir) ;
184- must_generate_tpch_table ! ( CustomerGenerator , CustomerArrow , "customer" , data_dir) ;
185- must_generate_tpch_table ! ( SupplierGenerator , SupplierArrow , "supplier" , data_dir) ;
186- must_generate_tpch_table ! ( PartGenerator , PartArrow , "part" , data_dir) ;
187- must_generate_tpch_table ! ( PartSuppGenerator , PartSuppArrow , "partsupp" , data_dir) ;
188- must_generate_tpch_table ! ( OrderGenerator , OrderArrow , "orders" , data_dir) ;
189- must_generate_tpch_table ! ( LineItemGenerator , LineItemArrow , "lineitem" , data_dir) ;
163+ macro_rules! must_generate_tpch_table {
164+ ( $generator: ident, $arrow: ident, $name: literal) => {
165+ let data_dir = data_dir. join( $name) ;
166+ fs:: create_dir_all( data_dir. clone( ) ) . expect( "Failed to create data directory" ) ;
167+ // create three partitions for the table
168+ ( 1 ..=parts) . for_each( |part| {
169+ generate_table(
170+ // TODO: Consider adjusting the partitions and batch sizes.
171+ $arrow:: new( $generator:: new( sf, part, parts) ) . with_batch_size( 1000 ) ,
172+ & format!( "{part}" ) ,
173+ & data_dir,
174+ )
175+ . expect( concat!( "Failed to generate " , $name, " table" ) ) ;
176+ } ) ;
177+ } ;
178+ }
179+
180+ must_generate_tpch_table ! ( RegionGenerator , RegionArrow , "region" ) ;
181+ must_generate_tpch_table ! ( NationGenerator , NationArrow , "nation" ) ;
182+ must_generate_tpch_table ! ( CustomerGenerator , CustomerArrow , "customer" ) ;
183+ must_generate_tpch_table ! ( SupplierGenerator , SupplierArrow , "supplier" ) ;
184+ must_generate_tpch_table ! ( PartGenerator , PartArrow , "part" ) ;
185+ must_generate_tpch_table ! ( PartSuppGenerator , PartSuppArrow , "partsupp" ) ;
186+ must_generate_tpch_table ! ( OrderGenerator , OrderArrow , "orders" ) ;
187+ must_generate_tpch_table ! ( LineItemGenerator , LineItemArrow , "lineitem" ) ;
190188}
0 commit comments