apache
diff --git a/‎backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala‎
Lines changed: 13 additions & 0 deletions b/‎backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala‎
Lines changed: 3 additions & 0 deletions b/‎gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎gluten-hudi/src/main/scala/org/apache/gluten/execution/HudiScanTransformer.scala‎
Lines changed: 3 additions & 0 deletions b/‎gluten-hudi/src/main/scala/org/apache/gluten/execution/HudiScanTransformer.scala‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎gluten-iceberg/src/main/scala/org/apache/gluten/execution/IcebergScanTransformer.scala‎
Lines changed: 4 additions & 1 deletion b/‎gluten-iceberg/src/main/scala/org/apache/gluten/execution/IcebergScanTransformer.scala‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎gluten-kafka/src/main/scala/org/apache/gluten/execution/MicroBatchScanExecTransformer.scala‎
Lines changed: 4 additions & 1 deletion b/‎gluten-kafka/src/main/scala/org/apache/gluten/execution/MicroBatchScanExecTransformer.scala‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎gluten-paimon/src-paimon/main/scala/org/apache/gluten/execution/PaimonScanTransformer.scala‎
Lines changed: 4 additions & 3 deletions b/‎gluten-paimon/src-paimon/main/scala/org/apache/gluten/execution/PaimonScanTransformer.scala‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala‎
Lines changed: 38 additions & 107 deletions b/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala‎
Lines changed: 38 additions & 107 deletions
diff --git a/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala‎
Lines changed: 2 additions & 0 deletions b/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/BatchScanExecTransformer.scala‎
Lines changed: 4 additions & 0 deletions b/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/BatchScanExecTransformer.scala‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/FileSourceScanExecTransformer.scala‎
Lines changed: 3 additions & 0 deletions b/‎gluten-substrait/src/main/scala/org/apache/gluten/execution/FileSourceScanExecTransformer.scala‎
Lines changed: 3 additions & 0 deletions
@@ -197,6 +197,19 @@ class VeloxScanSuite extends VeloxWholeStageTransformerSuite {
     }
   }
 
+  test("remove pushed down filter in filter node") {
+    createTPCHNotNullTables()
+    val query = "select l_partkey from lineitem where l_partkey = 1"
+    runQueryAndCompare(query) {
+      df =>
+        {
+          val executedPlan = getExecutedPlan(df)
+          val filter = executedPlan.collect { case f: FilterExecTransformerBase => f }
+          assert(filter.isEmpty)
+        }
+    }
+  }
+
   test("test binary as string") {
     withTempDir {
       dir =>
 
@@ -77,6 +77,9 @@ case class DeltaScanTransformer(
       disableBucketedScan
     )
   }
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =
+    copy(output = newOutput)
 }
 
 object DeltaScanTransformer {
 
@@ -69,6 +69,9 @@ case class HudiScanTransformer(
       disableBucketedScan
     )
   }
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =
+    copy(output = newOutput)
 }
 
 object HudiScanTransformer {
 
@@ -25,7 +25,7 @@ import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
 
 import org.apache.spark.Partition
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, DynamicPruningExpression, Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.read.Scan
@@ -238,6 +238,9 @@ case class IcebergScanTransformer(
       case _ => false
     }
   }
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =
+    copy(output = newOutput.map(_.asInstanceOf[AttributeReference]))
 }
 
 object IcebergScanTransformer {
 
@@ -22,7 +22,7 @@ import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
 
 import org.apache.spark.Partition
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
@@ -96,6 +96,9 @@ case class MicroBatchScanExecTransformer(
     ctx.root.asInstanceOf[ReadRelNode].setStreamKafka(true);
     ctx
   }
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =
+    copy(output = newOutput.map(_.asInstanceOf[AttributeReference]))
 }
 
 object MicroBatchScanExecTransformer {
 
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.softaffinity.SoftAffinity
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, DynamicPruningExpression, Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.read.Scan
@@ -74,8 +74,6 @@ case class PaimonScanTransformer(
       throw new GlutenNotSupportException("Only support PaimonScan.")
   }
 
-  override def filterExprs(): Seq[Expression] = pushdownFilters
-
   override def getPartitionSchema: StructType = scan match {
     case paimonScan: PaimonScan =>
       val partitionKeys = paimonScan.table.partitionKeys()
@@ -179,6 +177,9 @@ case class PaimonScanTransformer(
 
   override protected[this] def supportsBatchScan(scan: Scan): Boolean =
     PaimonScanTransformer.supportsBatchScan(scan)
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =
+    copy(output = newOutput.map(_.asInstanceOf[AttributeReference]))
 }
 
 object PaimonScanTransformer {
 
@@ -17,7 +17,6 @@
 package org.apache.gluten.execution
 
 import org.apache.gluten.backendsapi.BackendsApiManager
-import org.apache.gluten.exception.GlutenNotSupportException
 import org.apache.gluten.expression.{ExpressionConverter, ExpressionTransformer}
 import org.apache.gluten.extension.columnar.transition.Convention
 import org.apache.gluten.metrics.MetricsUpdater
@@ -26,9 +25,9 @@ import org.apache.gluten.substrait.rel.{RelBuilder, RelNode}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{PredicateHelper, _}
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.utils.StructTypeFWD
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
@@ -42,26 +41,11 @@ abstract class FilterExecTransformerBase(val cond: Expression, val input: SparkP
   with Logging {
 
   // Note: "metrics" is made transient to avoid sending driver-side metrics to tasks.
-  @transient override lazy val metrics =
+  @transient override lazy val metrics: Map[String, SQLMetric] =
     BackendsApiManager.getMetricsApiInstance.genFilterTransformerMetrics(sparkContext)
 
-  // Split out all the IsNotNulls from condition.
-  protected val (notNullPreds, _) = splitConjunctivePredicates(cond).partition {
-    case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet)
-    case _ => false
-  }
-
-  // The columns that will filtered out by `IsNotNull` could be considered as not nullable.
-  protected val notNullAttributes: Seq[ExprId] =
-    notNullPreds.flatMap(_.references).distinct.map(_.exprId)
-
-  override def isNoop: Boolean = getRemainingCondition == null
-
-  override def metricsUpdater(): MetricsUpdater = if (isNoop) {
-    MetricsUpdater.None
-  } else {
+  override def metricsUpdater(): MetricsUpdater =
     BackendsApiManager.getMetricsApiInstance.genFilterTransformerMetricsUpdater(metrics)
-  }
 
   def getRelNode(
       context: SubstraitContext,
@@ -84,85 +68,58 @@ abstract class FilterExecTransformerBase(val cond: Expression, val input: SparkP
     )
   }
 
-  override def output: Seq[Attribute] = {
-    child.output.map {
-      a =>
-        if (a.nullable && notNullAttributes.contains(a.exprId)) {
-          a.withNullability(false)
-        } else {
-          a
-        }
-    }
-  }
+  override def output: Seq[Attribute] = FilterExecTransformerBase.buildNewOutput(child.output, cond)
 
   override protected def orderingExpressions: Seq[SortOrder] = child.outputOrdering
 
   override protected def outputExpressions: Seq[NamedExpression] = child.output
 
-  // FIXME: Should use field "condition" to store the actual executed filter expressions.
-  //  To make optimization easier (like to remove filter when it actually does nothing)
-  protected def getRemainingCondition: Expression = {
-    val scanFilters = child match {
-      // Get the filters including the manually pushed down ones.
-      case basicScanExecTransformer: BasicScanExecTransformer =>
-        basicScanExecTransformer.filterExprs()
-      // For fallback scan, we need to keep original filter.
-      case _ =>
-        Seq.empty[Expression]
-    }
-    if (scanFilters.isEmpty) {
-      cond
-    } else {
-      val remainingFilters =
-        FilterHandler.getRemainingFilters(scanFilters, splitConjunctivePredicates(cond))
-      remainingFilters.reduceLeftOption(And).orNull
-    }
-  }
-
   override protected def doValidateInternal(): ValidationResult = {
-    val remainingCondition = getRemainingCondition
-    if (remainingCondition == null) {
-      // All the filters can be pushed down and the computing of this Filter
-      // is not needed.
-      return ValidationResult.succeeded
-    }
     val substraitContext = new SubstraitContext
     val operatorId = substraitContext.nextOperatorId(this.nodeName)
     // Firstly, need to check if the Substrait plan for this operator can be successfully generated.
-    val relNode = getRelNode(
-      substraitContext,
-      remainingCondition,
-      child.output,
-      operatorId,
-      null,
-      validation = true)
+    val relNode =
+      getRelNode(substraitContext, cond, child.output, operatorId, null, validation = true)
     // Then, validate the generated plan in native engine.
     doNativeValidation(substraitContext, relNode)
   }
 
   override protected def doTransform(context: SubstraitContext): TransformContext = {
     val childCtx = child.asInstanceOf[TransformSupport].transform(context)
-    if (isNoop) {
-      // The computing for this filter is not needed.
-      // Since some columns' nullability will be removed after this filter, we need to update the
-      // outputAttributes of child context.
-      return TransformContext(output, childCtx.root)
-    }
-
     val operatorId = context.nextOperatorId(this.nodeName)
-    val remainingCondition = getRemainingCondition
-    val currRel = getRelNode(
-      context,
-      remainingCondition,
-      child.output,
-      operatorId,
-      childCtx.root,
-      validation = false)
+    val currRel =
+      getRelNode(context, cond, child.output, operatorId, childCtx.root, validation = false)
     assert(currRel != null, "Filter rel should be valid.")
     TransformContext(output, currRel)
   }
 }
 
+object FilterExecTransformerBase extends PredicateHelper {
+
+  def buildNewOutput(output: Seq[Attribute], cond: Expression): Seq[Attribute] = {
+    buildNewOutput(output, splitConjunctivePredicates(cond))
+  }
+
+  def buildNewOutput(output: Seq[Attribute], conds: Seq[Expression]): Seq[Attribute] = {
+    // Split out all the IsNotNulls from condition.
+    val (notNullPreds, _) = conds.partition {
+      case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(AttributeSet(output))
+      case _ => false
+    }
+
+    // The columns that will filter out by `IsNotNull` could be considered as not nullable.
+    val notNullAttributes: Seq[ExprId] = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
+    output.map {
+      a =>
+        if (a.nullable && notNullAttributes.contains(a.exprId)) {
+          a.withNullability(false)
+        } else {
+          a
+        }
+    }
+  }
+}
+
 abstract class ProjectExecTransformerBase(val list: Seq[NamedExpression], val input: SparkPlan)
   extends UnaryTransformSupport
   with OrderPreservingNodeShim
@@ -171,7 +128,7 @@ abstract class ProjectExecTransformerBase(val list: Seq[NamedExpression], val in
   with Logging {
 
   // Note: "metrics" is made transient to avoid sending driver-side metrics to tasks.
-  @transient override lazy val metrics =
+  @transient override lazy val metrics: Map[String, SQLMetric] =
     BackendsApiManager.getMetricsApiInstance.genProjectTransformerMetrics(sparkContext)
 
   override protected def doValidateInternal(): ValidationResult = {
@@ -281,37 +238,11 @@ case class ColumnarUnionExec(children: Seq[SparkPlan]) extends ValidatablePlan {
 }
 
 /**
- * Contains functions for the comparision and separation of the filter conditions in Scan and
- * Filter. Contains the function to manually push down the conditions into Scan.
+ * Contains functions for the comparison and separation of the filter conditions in Scan and Filter.
+ * Contains the function to manually push down the conditions into Scan.
  */
 object FilterHandler extends PredicateHelper {
 
-  /**
-   * Get the original filter conditions in Scan for the comparison with those in Filter.
-   *
-   * @param plan
-   *   : the Spark plan
-   * @return
-   *   If the plan is FileSourceScanExec or BatchScanExec, return the filter conditions in it.
-   *   Otherwise, return empty sequence.
-   */
-  def getScanFilters(plan: SparkPlan): Seq[Expression] = {
-    plan match {
-      case fileSourceScan: FileSourceScanExec =>
-        fileSourceScan.dataFilters
-      case batchScan: BatchScanExec =>
-        batchScan.scan match {
-          case scan: FileScan =>
-            scan.dataFilters
-          case _ =>
-            throw new GlutenNotSupportException(
-              s"${batchScan.scan.getClass.toString} is not supported")
-        }
-      case _ =>
-        Seq()
-    }
-  }
-
   /**
    * Compare the semantics of the filter conditions pushed down to Scan and in the Filter.
    *
 
@@ -40,6 +40,8 @@ trait BasicScanExecTransformer extends LeafTransformSupport with BaseDataSource
   /** Returns the filters that can be pushed down to native file scan */
   def filterExprs(): Seq[Expression]
 
+  def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer
+
   def getMetadataColumns(): Seq[AttributeReference]
 
   /** This can be used to report FileFormat for a file based scan operator. */
 
@@ -68,6 +68,10 @@ case class BatchScanExecTransformer(
         output)
     )
   }
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer = {
+    copy(output = newOutput.map(_.asInstanceOf[AttributeReference]))
+  }
 }
 
 abstract class BatchScanExecTransformerBase(
 
@@ -74,6 +74,9 @@ case class FileSourceScanExecTransformer(
       disableBucketedScan
     )
   }
+
+  override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =
+    copy(output = newOutput)
 }
 
 abstract class FileSourceScanExecTransformerBase(
Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,9 @@ case class DeltaScanTransformer(`
`77`	`77`	`disableBucketedScan`
`78`	`78`	`)`
`79`	`79`	`}`
	`80`	`+`
	`81`	`+ override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =`
	`82`	`+ copy(output = newOutput)`
`80`	`83`	`}`
`81`	`84`
`82`	`85`	`object DeltaScanTransformer {`
Original file line number	Diff line number	Diff line change
`@@ -69,6 +69,9 @@ case class HudiScanTransformer(`
`69`	`69`	`disableBucketedScan`
`70`	`70`	`)`
`71`	`71`	`}`
	`72`	`+`
	`73`	`+ override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =`
	`74`	`+ copy(output = newOutput)`
`72`	`75`	`}`
`73`	`76`
`74`	`77`	`object HudiScanTransformer {`
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat`
`25`	`25`
`26`	`26`	`import org.apache.spark.Partition`
`27`	`27`	`import org.apache.spark.sql.catalyst.InternalRow`
`28`		`-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal}`
	`28`	`+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, DynamicPruningExpression, Expression, Literal}`
`29`	`29`	`import org.apache.spark.sql.catalyst.plans.QueryPlan`
`30`	`30`	`import org.apache.spark.sql.connector.catalog.Table`
`31`	`31`	`import org.apache.spark.sql.connector.read.Scan`
`@@ -238,6 +238,9 @@ case class IcebergScanTransformer(`
`238`	`238`	`case _ => false`
`239`	`239`	`}`
`240`	`240`	`}`
	`241`	`+`
	`242`	`+ override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =`
	`243`	`+ copy(output = newOutput.map(_.asInstanceOf[AttributeReference]))`
`241`	`244`	`}`
`242`	`245`
`243`	`246`	`object IcebergScanTransformer {`
Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,10 @@ case class BatchScanExecTransformer(`
`68`	`68`	`output)`
`69`	`69`	`)`
`70`	`70`	`}`
	`71`	`+`
	`72`	`+ override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer = {`
	`73`	`+ copy(output = newOutput.map(_.asInstanceOf[AttributeReference]))`
	`74`	`+ }`
`71`	`75`	`}`
`72`	`76`
`73`	`77`	`abstract class BatchScanExecTransformerBase(`
Original file line number	Diff line number	Diff line change
`@@ -74,6 +74,9 @@ case class FileSourceScanExecTransformer(`
`74`	`74`	`disableBucketedScan`
`75`	`75`	`)`
`76`	`76`	`}`
	`77`	`+`
	`78`	`+ override def withNewOutput(newOutput: Seq[Attribute]): BasicScanExecTransformer =`
	`79`	`+ copy(output = newOutput)`
`77`	`80`	`}`
`78`	`81`
`79`	`82`	`abstract class FileSourceScanExecTransformerBase(`