diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java index 81b89e5750d83..3f83a8dc4dddc 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java @@ -157,4 +157,11 @@ enum ColumnarSupportMode { default ColumnarSupportMode columnarSupportMode() { return ColumnarSupportMode.PARTITION_DEFINED; } + + /** + * Return the canonicalized scan + * + * @since 4.1.0 + */ + default Scan doCanonicalize() {return this;} } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala index 26f4069994943..2dcaf29f980f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression, SortOrder} +import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, ExposesMetadataColumns, Histogram, HistogramBin, LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, truncatedString, CharVarcharUtils} @@ -163,6 +164,17 @@ case class DataSourceV2ScanRelation( Statistics(sizeInBytes = conf.defaultSizeInBytes) } } + + override def doCanonicalize(): LogicalPlan = { + val canonicalized = this.copy( + relation = this.relation.copy( + output = this.relation.output.map(QueryPlan.normalizeExpressions(_, this.relation.output)) + ), + output = this.output.map(QueryPlan.normalizeExpressions(_, this.output)), + scan = this.scan.doCanonicalize() + ) + canonicalized + } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala index 55866cc858405..33dd98d0e37fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala @@ -149,6 +149,7 @@ case class BatchScanExec( override def doCanonicalize(): BatchScanExec = { this.copy( + scan = scan.doCanonicalize(), output = output.map(QueryPlan.normalizeExpressions(_, output)), runtimeFilters = QueryPlan.normalizePredicates( runtimeFilters.filterNot(_ == DynamicPruningExpression(Literal.TrueLiteral)),