@@ -17,14 +17,16 @@ use crate::queryplanner::serialized_plan::{IndexSnapshot, RowFilter, RowRange, S
1717use crate :: queryplanner:: trace_data_loaded:: DataLoadedSize ;
1818use crate :: store:: DataFrame ;
1919use crate :: table:: data:: rows_to_columns;
20- use crate :: table:: parquet:: { parquet_source , CubestoreParquetMetadataCache } ;
20+ use crate :: table:: parquet:: CubestoreParquetMetadataCache ;
2121use crate :: table:: { Row , TableValue , TimestampValue } ;
2222use crate :: telemetry:: suboptimal_query_plan_event;
2323use crate :: util:: memory:: MemoryHandler ;
2424use crate :: { app_metrics, CubeError } ;
2525use async_trait:: async_trait;
26+ use datafusion:: config:: TableParquetOptions ;
2627use datafusion:: physical_plan:: execution_plan:: { Boundedness , EmissionType } ;
2728use datafusion_datasource:: memory:: MemoryExec ;
29+ use datafusion_datasource:: source:: DataSourceExec ;
2830use core:: fmt;
2931use datafusion:: arrow:: array:: {
3032 make_array, Array , ArrayRef , BinaryArray , BooleanArray , Decimal128Array , Float64Array ,
@@ -40,9 +42,9 @@ use datafusion::catalog::Session;
4042use datafusion:: common:: ToDFSchema ;
4143use datafusion:: datasource:: listing:: PartitionedFile ;
4244use datafusion:: datasource:: object_store:: ObjectStoreUrl ;
43- use datafusion:: datasource:: physical_plan:: parquet:: ParquetExecBuilder ;
45+ use datafusion:: datasource:: physical_plan:: parquet:: get_reader_options_customizer ;
4446use datafusion:: datasource:: physical_plan:: {
45- FileScanConfig , ParquetExec , ParquetFileReaderFactory , ParquetSource ,
47+ FileScanConfig , ParquetFileReaderFactory , ParquetSource ,
4648} ;
4749use datafusion:: datasource:: { TableProvider , TableType } ;
4850use datafusion:: error:: DataFusionError ;
@@ -401,7 +403,7 @@ impl QueryExecutorImpl {
401403 serialized_plan : Arc < PreSerializedPlan > ,
402404 ) -> Result < Arc < SessionContext > , CubeError > {
403405 let runtime = Arc :: new ( RuntimeEnv :: default ( ) ) ;
404- let config = Self :: session_config ( ) ;
406+ let config = self . session_config ( ) ;
405407 let session_state = SessionStateBuilder :: new ( )
406408 . with_config ( config)
407409 . with_runtime_env ( runtime)
@@ -455,7 +457,7 @@ impl QueryExecutorImpl {
455457 data_loaded_size : Option < Arc < DataLoadedSize > > ,
456458 ) -> Result < Arc < SessionContext > , CubeError > {
457459 let runtime = Arc :: new ( RuntimeEnv :: default ( ) ) ;
458- let config = Self :: session_config ( ) ;
460+ let config = self . session_config ( ) ;
459461 let session_state = SessionStateBuilder :: new ( )
460462 . with_config ( config)
461463 . with_runtime_env ( runtime)
@@ -474,8 +476,8 @@ impl QueryExecutorImpl {
474476 Ok ( Arc :: new ( ctx) )
475477 }
476478
477- fn session_config ( ) -> SessionConfig {
478- let mut config = SessionConfig :: new ( )
479+ fn session_config ( & self ) -> SessionConfig {
480+ let mut config = self . metadata_cache_factory . make_session_config ( )
479481 . with_batch_size ( 4096 )
480482 // TODO upgrade DF if less than 2 then there will be no MergeJoin. Decide on repartitioning.
481483 . with_target_partitions ( 2 )
@@ -693,8 +695,16 @@ impl CubeTable {
693695 . get ( remote_path. as_str ( ) )
694696 . expect ( format ! ( "Missing remote path {}" , remote_path) . as_str ( ) ) ;
695697
698+ let parquet_source = ParquetSource :: new ( TableParquetOptions :: default ( ) , get_reader_options_customizer ( state. config ( ) ) )
699+ . with_parquet_file_reader_factory ( self . parquet_metadata_cache . clone ( ) ) ;
700+ let parquet_source = if let Some ( phys_pred) = & physical_predicate {
701+ parquet_source. with_predicate ( index_schema. clone ( ) , phys_pred. clone ( ) )
702+ } else {
703+ parquet_source
704+ } ;
705+
696706 let file_scan =
697- FileScanConfig :: new ( ObjectStoreUrl :: local_filesystem ( ) , index_schema. clone ( ) , parquet_source ( ) )
707+ FileScanConfig :: new ( ObjectStoreUrl :: local_filesystem ( ) , index_schema. clone ( ) , Arc :: new ( parquet_source ) )
698708 . with_file ( PartitionedFile :: from_path ( local_path. to_string ( ) ) ?)
699709 . with_projection ( index_projection_or_none_on_schema_match. clone ( ) )
700710 . with_output_ordering ( vec ! [ LexOrdering :: new( ( 0 ..key_len)
@@ -710,16 +720,11 @@ impl CubeTable {
710720 ) )
711721 } )
712722 . collect:: <Result <Vec <_>, _>>( ) ?) ] ) ;
713- let parquet_exec_builder = ParquetExecBuilder :: new ( file_scan)
714- . with_parquet_file_reader_factory ( self . parquet_metadata_cache . clone ( ) ) ;
715- let parquet_exec_builder = if let Some ( phys_pred) = & physical_predicate {
716- parquet_exec_builder. with_predicate ( phys_pred. clone ( ) )
717- } else {
718- parquet_exec_builder
719- } ;
720- let parquet_exec = parquet_exec_builder. build ( ) ;
721723
722- let arc: Arc < dyn ExecutionPlan > = Arc :: new ( parquet_exec) ;
724+
725+ let data_source_exec = DataSourceExec :: new ( Arc :: new ( file_scan) ) ;
726+
727+ let arc: Arc < dyn ExecutionPlan > = Arc :: new ( data_source_exec) ;
723728 let arc = FilterByKeyRangeExec :: issue_filters ( arc, filter. clone ( ) , key_len) ;
724729 partition_execs. push ( arc) ;
725730 }
@@ -763,7 +768,15 @@ impl CubeTable {
763768 . get ( & remote_path)
764769 . expect ( format ! ( "Missing remote path {}" , remote_path) . as_str ( ) ) ;
765770
766- let file_scan = FileScanConfig :: new ( ObjectStoreUrl :: local_filesystem ( ) , index_schema. clone ( ) , parquet_source ( ) )
771+ let parquet_source = ParquetSource :: new ( TableParquetOptions :: default ( ) , get_reader_options_customizer ( state. config ( ) ) )
772+ . with_parquet_file_reader_factory ( self . parquet_metadata_cache . clone ( ) ) ;
773+ let parquet_source = if let Some ( phys_pred) = & physical_predicate {
774+ parquet_source. with_predicate ( index_schema. clone ( ) , phys_pred. clone ( ) )
775+ } else {
776+ parquet_source
777+ } ;
778+
779+ let file_scan = FileScanConfig :: new ( ObjectStoreUrl :: local_filesystem ( ) , index_schema. clone ( ) , Arc :: new ( parquet_source) )
767780 . with_file ( PartitionedFile :: from_path ( local_path. to_string ( ) ) ?)
768781 . with_projection ( index_projection_or_none_on_schema_match. clone ( ) )
769782 . with_output_ordering ( vec ! [ LexOrdering :: new( ( 0 ..key_len) . map( |i| -> Result <_, DataFusionError > { Ok ( PhysicalSortExpr :: new(
@@ -773,16 +786,9 @@ impl CubeTable {
773786 SortOptions :: default ( ) ,
774787 ) ) } ) . collect:: <Result <Vec <_>, _>>( ) ?) ] )
775788 ;
776- let parquet_exec_builder = ParquetExecBuilder :: new ( file_scan)
777- . with_parquet_file_reader_factory ( self . parquet_metadata_cache . clone ( ) ) ;
778- let parquet_exec_builder = if let Some ( phys_pred) = & physical_predicate {
779- parquet_exec_builder. with_predicate ( phys_pred. clone ( ) )
780- } else {
781- parquet_exec_builder
782- } ;
783- let parquet_exec = parquet_exec_builder. build ( ) ;
784789
785- Arc :: new ( parquet_exec)
790+ let data_source_exec = DataSourceExec :: new ( Arc :: new ( file_scan) ) ;
791+ Arc :: new ( data_source_exec)
786792 } ;
787793
788794 let node = FilterByKeyRangeExec :: issue_filters ( node, filter. clone ( ) , key_len) ;
0 commit comments