@@ -22,28 +22,24 @@ use databend_common_base::runtime::profile::Profile;
2222use databend_common_base:: runtime:: profile:: ProfileStatisticsName ;
2323use databend_common_catalog:: table_context:: TableContext ;
2424use databend_common_exception:: Result ;
25- use databend_common_expression:: BlockEntry ;
2625use databend_common_expression:: DataBlock ;
2726use databend_common_expression:: DataSchemaRef ;
28- use databend_common_expression:: Evaluator ;
29- use databend_common_expression:: FunctionContext ;
30- use databend_common_functions:: BUILTIN_FUNCTIONS ;
27+ use databend_common_expression:: Expr ;
3128use databend_common_pipeline_core:: processors:: Event ;
3229use databend_common_pipeline_core:: processors:: OutputPort ;
3330use databend_common_pipeline_core:: processors:: Processor ;
3431use databend_common_pipeline_core:: processors:: ProcessorPtr ;
3532use opendal:: Operator ;
3633
34+ use crate :: copy_into_table:: projection:: CopyProjectionEvaluator ;
3735use crate :: copy_into_table:: reader:: RowGroupReaderForCopy ;
3836use crate :: parquet_reader:: policy:: ReadPolicyImpl ;
3937use crate :: read_settings:: ReadSettings ;
4038use crate :: ParquetPart ;
4139
42- type SchemaIndex = usize ;
43-
4440enum State {
4541 Init ,
46- ReadRowGroup ( ( SchemaIndex , ReadPolicyImpl ) ) ,
42+ ReadRowGroup ( ( Vec < Expr > , ReadPolicyImpl ) ) ,
4743}
4844
4945pub struct ParquetCopySource {
@@ -59,9 +55,7 @@ pub struct ParquetCopySource {
5955 // Used to read parquet.
6056 row_group_readers : Arc < HashMap < usize , RowGroupReaderForCopy > > ,
6157 operator : Operator ,
62- schema : DataSchemaRef ,
63- func_ctx : FunctionContext ,
64-
58+ copy_projection_evaluator : CopyProjectionEvaluator ,
6559 state : State ,
6660 batch_size : usize ,
6761}
@@ -76,20 +70,20 @@ impl ParquetCopySource {
7670 ) -> Result < ProcessorPtr > {
7771 let scan_progress = ctx. get_scan_progress ( ) ;
7872 let batch_size = ctx. get_settings ( ) . get_parquet_max_block_size ( ) ? as usize ;
79- let func_ctx = ctx. get_function_context ( ) ?;
73+ let func_ctx = Arc :: new ( ctx. get_function_context ( ) ?) ;
74+ let copy_projection_evaluator = CopyProjectionEvaluator :: new ( schema, func_ctx) ;
8075
8176 Ok ( ProcessorPtr :: create ( Box :: new ( Self {
8277 output,
8378 scan_progress,
8479 ctx,
8580 operator,
8681 row_group_readers,
87- func_ctx,
8882 batch_size,
8983 generated_data : None ,
9084 is_finished : false ,
9185 state : State :: Init ,
92- schema ,
86+ copy_projection_evaluator ,
9387 } ) ) )
9488 }
9589}
@@ -141,23 +135,13 @@ impl Processor for ParquetCopySource {
141135
142136 fn process ( & mut self ) -> Result < ( ) > {
143137 match std:: mem:: replace ( & mut self . state , State :: Init ) {
144- State :: ReadRowGroup ( ( schema_index , mut reader) ) => {
138+ State :: ReadRowGroup ( ( projection , mut reader) ) => {
145139 if let Some ( block) = reader. as_mut ( ) . read_block ( ) ? {
146- let projection = self
147- . row_group_readers
148- . get ( & schema_index)
149- . unwrap ( )
150- . output_projection ( ) ;
151- let evaluator = Evaluator :: new ( & block, & self . func_ctx , & BUILTIN_FUNCTIONS ) ;
152- let mut columns = Vec :: with_capacity ( projection. len ( ) ) ;
153- for ( field, expr) in self . schema . fields ( ) . iter ( ) . zip ( projection. iter ( ) ) {
154- let value = evaluator. run ( expr) ?;
155- let column = BlockEntry :: new ( field. data_type ( ) . clone ( ) , value) ;
156- columns. push ( column) ;
157- }
158- let block = DataBlock :: new ( columns, block. num_rows ( ) ) ;
159- self . generated_data = Some ( block) ;
160- self . state = State :: ReadRowGroup ( ( schema_index, reader) ) ;
140+ self . generated_data = Some (
141+ self . copy_projection_evaluator
142+ . project ( & block, & projection) ?,
143+ ) ;
144+ self . state = State :: ReadRowGroup ( ( projection, reader) ) ;
161145 }
162146 // Else: The reader is finished. We should try to build another reader.
163147 }
@@ -178,6 +162,7 @@ impl Processor for ParquetCopySource {
178162 . row_group_readers
179163 . get ( & schema_index)
180164 . expect ( "schema index must exist" ) ;
165+ let projection = builder. output_projection ( ) . to_vec ( ) ;
181166 let reader = builder
182167 . build_reader (
183168 part,
@@ -188,7 +173,7 @@ impl Processor for ParquetCopySource {
188173 . await ?
189174 . expect ( "reader must exist" ) ;
190175 {
191- self . state = State :: ReadRowGroup ( ( schema_index , reader) ) ;
176+ self . state = State :: ReadRowGroup ( ( projection , reader) ) ;
192177 }
193178 // Else: keep in init state.
194179 }
@@ -200,7 +185,6 @@ impl Processor for ParquetCopySource {
200185 }
201186 _ => unreachable ! ( ) ,
202187 }
203-
204188 Ok ( ( ) )
205189 }
206190}
0 commit comments