| 
1 | 1 | use std::any::Any;  | 
2 |  | -use std::fmt::Formatter;  | 
3 | 2 | use std::sync::Arc;  | 
4 | 3 | 
 
  | 
5 |  | -use arrow_schema::SchemaRef;  | 
6 |  | -use datafusion::execution::{SendableRecordBatchStream, TaskContext};  | 
7 |  | -use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};  | 
 | 4 | +use arrow_schema::{Schema, SchemaRef};  | 
 | 5 | +use async_trait::async_trait;  | 
 | 6 | +use datafusion::catalog::Session;  | 
 | 7 | +use datafusion::catalog::TableProvider;  | 
 | 8 | +use datafusion::execution::SessionState;  | 
 | 9 | +use datafusion_common::{exec_datafusion_err, Column, DFSchema, Result as DataFusionResult};  | 
 | 10 | +use datafusion_expr::utils::conjunction;  | 
 | 11 | +use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};  | 
 | 12 | +use datafusion_physical_expr::PhysicalExpr;  | 
 | 13 | +use datafusion_physical_plan::filter::FilterExec;  | 
 | 14 | +use datafusion_physical_plan::limit::GlobalLimitExec;  | 
 | 15 | +use datafusion_physical_plan::projection::ProjectionExec;  | 
 | 16 | +use datafusion_physical_plan::ExecutionPlan;  | 
8 | 17 | 
 
  | 
9 |  | -/// Physical execution of a scan  | 
10 |  | -#[derive(Debug, Clone)]  | 
11 |  | -pub struct DeltaCdfScan {  | 
12 |  | -    plan: Arc<dyn ExecutionPlan>,  | 
13 |  | -}  | 
 | 18 | +use crate::DeltaTableError;  | 
 | 19 | +use crate::{  | 
 | 20 | +    delta_datafusion::DataFusionMixins, operations::load_cdf::CdfLoadBuilder, DeltaResult,  | 
 | 21 | +};  | 
14 | 22 | 
 
  | 
15 |  | -impl DeltaCdfScan {  | 
16 |  | -    /// Creates a new scan  | 
17 |  | -    pub fn new(plan: Arc<dyn ExecutionPlan>) -> Self {  | 
18 |  | -        Self { plan }  | 
19 |  | -    }  | 
 | 23 | +use super::ADD_PARTITION_SCHEMA;  | 
 | 24 | + | 
 | 25 | +fn session_state_from_session(session: &dyn Session) -> DataFusionResult<&SessionState> {  | 
 | 26 | +    session  | 
 | 27 | +        .as_any()  | 
 | 28 | +        .downcast_ref::<SessionState>()  | 
 | 29 | +        .ok_or_else(|| exec_datafusion_err!("Failed to downcast Session to SessionState"))  | 
20 | 30 | }  | 
21 | 31 | 
 
  | 
22 |  | -impl DisplayAs for DeltaCdfScan {  | 
23 |  | -    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {  | 
24 |  | -        write!(f, "{:?}", self)  | 
25 |  | -    }  | 
 | 32 | +#[derive(Debug)]  | 
 | 33 | +pub struct DeltaCdfTableProvider {  | 
 | 34 | +    cdf_builder: CdfLoadBuilder,  | 
 | 35 | +    schema: SchemaRef,  | 
26 | 36 | }  | 
27 | 37 | 
 
  | 
28 |  | -impl ExecutionPlan for DeltaCdfScan {  | 
29 |  | -    fn name(&self) -> &str {  | 
30 |  | -        Self::static_name()  | 
 | 38 | +impl DeltaCdfTableProvider {  | 
 | 39 | +    /// Build a DeltaCDFTableProvider  | 
 | 40 | +    pub fn try_new(cdf_builder: CdfLoadBuilder) -> DeltaResult<Self> {  | 
 | 41 | +        let mut fields = cdf_builder.snapshot.input_schema()?.fields().to_vec();  | 
 | 42 | +        for f in ADD_PARTITION_SCHEMA.clone() {  | 
 | 43 | +            fields.push(f.into());  | 
 | 44 | +        }  | 
 | 45 | +        Ok(DeltaCdfTableProvider {  | 
 | 46 | +            cdf_builder,  | 
 | 47 | +            schema: Schema::new(fields).into(),  | 
 | 48 | +        })  | 
31 | 49 |     }  | 
 | 50 | +}  | 
32 | 51 | 
 
  | 
 | 52 | +#[async_trait]  | 
 | 53 | +impl TableProvider for DeltaCdfTableProvider {  | 
33 | 54 |     fn as_any(&self) -> &dyn Any {  | 
34 | 55 |         self  | 
35 | 56 |     }  | 
36 | 57 | 
 
  | 
37 | 58 |     fn schema(&self) -> SchemaRef {  | 
38 |  | -        self.plan.schema().clone()  | 
 | 59 | +        self.schema.clone()  | 
39 | 60 |     }  | 
40 | 61 | 
 
  | 
41 |  | -    fn properties(&self) -> &datafusion::physical_plan::PlanProperties {  | 
42 |  | -        self.plan.properties()  | 
 | 62 | +    fn table_type(&self) -> TableType {  | 
 | 63 | +        TableType::Base  | 
43 | 64 |     }  | 
44 | 65 | 
 
  | 
45 |  | -    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {  | 
46 |  | -        vec![]  | 
47 |  | -    }  | 
 | 66 | +    async fn scan(  | 
 | 67 | +        &self,  | 
 | 68 | +        session: &dyn Session,  | 
 | 69 | +        projection: Option<&Vec<usize>>,  | 
 | 70 | +        filters: &[Expr],  | 
 | 71 | +        limit: Option<usize>,  | 
 | 72 | +    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {  | 
 | 73 | +        let session_state = session_state_from_session(session)?;  | 
 | 74 | +        let mut plan = self.cdf_builder.build(session_state).await?;  | 
 | 75 | + | 
 | 76 | +        let df_schema: DFSchema = plan.schema().try_into()?;  | 
 | 77 | + | 
 | 78 | +        if let Some(filter_expr) = conjunction(filters.iter().cloned()) {  | 
 | 79 | +            let physical_expr = session.create_physical_expr(filter_expr, &df_schema)?;  | 
 | 80 | +            plan = Arc::new(FilterExec::try_new(physical_expr, plan)?);  | 
 | 81 | +        }  | 
 | 82 | + | 
 | 83 | +        if let Some(projection) = projection {  | 
 | 84 | +            let current_projection = (0..plan.schema().fields().len()).collect::<Vec<usize>>();  | 
 | 85 | +            if projection != ¤t_projection {  | 
 | 86 | +                let fields: DeltaResult<Vec<(Arc<dyn PhysicalExpr>, String)>> = projection  | 
 | 87 | +                    .iter()  | 
 | 88 | +                    .map(|i| {  | 
 | 89 | +                        let (table_ref, field) = df_schema.qualified_field(*i);  | 
 | 90 | +                        session  | 
 | 91 | +                            .create_physical_expr(  | 
 | 92 | +                                Expr::Column(Column::from((table_ref, field))),  | 
 | 93 | +                                &df_schema,  | 
 | 94 | +                            )  | 
 | 95 | +                            .map(|expr| (expr, field.name().clone()))  | 
 | 96 | +                            .map_err(DeltaTableError::from)  | 
 | 97 | +                    })  | 
 | 98 | +                    .collect();  | 
 | 99 | +                let fields = fields?;  | 
 | 100 | +                plan = Arc::new(ProjectionExec::try_new(fields, plan)?);  | 
 | 101 | +            }  | 
 | 102 | +        }  | 
48 | 103 | 
 
  | 
49 |  | -    fn with_new_children(  | 
50 |  | -        self: Arc<Self>,  | 
51 |  | -        _children: Vec<Arc<dyn ExecutionPlan>>,  | 
52 |  | -    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {  | 
53 |  | -        self.plan.clone().with_new_children(_children)  | 
 | 104 | +        if let Some(limit) = limit {  | 
 | 105 | +            plan = Arc::new(GlobalLimitExec::new(plan, 0, Some(limit)))  | 
 | 106 | +        };  | 
 | 107 | +        Ok(plan)  | 
54 | 108 |     }  | 
55 | 109 | 
 
  | 
56 |  | -    fn execute(  | 
 | 110 | +    fn supports_filters_pushdown(  | 
57 | 111 |         &self,  | 
58 |  | -        partition: usize,  | 
59 |  | -        context: Arc<TaskContext>,  | 
60 |  | -    ) -> datafusion_common::Result<SendableRecordBatchStream> {  | 
61 |  | -        self.plan.execute(partition, context)  | 
 | 112 | +        filter: &[&Expr],  | 
 | 113 | +    ) -> DataFusionResult<Vec<TableProviderFilterPushDown>> {  | 
 | 114 | +        Ok(filter  | 
 | 115 | +            .iter()  | 
 | 116 | +            .map(|_| TableProviderFilterPushDown::Exact) // maybe exact  | 
 | 117 | +            .collect())  | 
62 | 118 |     }  | 
63 | 119 | }  | 
0 commit comments