1717package org .apache .gluten .execution
1818
1919import org .apache .gluten .backendsapi .BackendsApiManager
20- import org .apache .gluten .exception .GlutenNotSupportException
2120import org .apache .gluten .expression .{ExpressionConverter , ExpressionTransformer }
2221import org .apache .gluten .extension .columnar .transition .Convention
2322import org .apache .gluten .metrics .MetricsUpdater
@@ -26,9 +25,9 @@ import org.apache.gluten.substrait.rel.{RelBuilder, RelNode}
2625
2726import org .apache .spark .internal .Logging
2827import org .apache .spark .rdd .RDD
29- import org .apache .spark .sql .catalyst .expressions ._
28+ import org .apache .spark .sql .catalyst .expressions .{ PredicateHelper , _ }
3029import org .apache .spark .sql .execution ._
31- import org .apache .spark .sql .execution .datasources . v2 .{ BatchScanExec , FileScan }
30+ import org .apache .spark .sql .execution .metric . SQLMetric
3231import org .apache .spark .sql .utils .StructTypeFWD
3332import org .apache .spark .sql .vectorized .ColumnarBatch
3433
@@ -42,26 +41,11 @@ abstract class FilterExecTransformerBase(val cond: Expression, val input: SparkP
4241 with Logging {
4342
4443 // Note: "metrics" is made transient to avoid sending driver-side metrics to tasks.
45- @ transient override lazy val metrics =
44+ @ transient override lazy val metrics : Map [ String , SQLMetric ] =
4645 BackendsApiManager .getMetricsApiInstance.genFilterTransformerMetrics(sparkContext)
4746
48- // Split out all the IsNotNulls from condition.
49- protected val (notNullPreds, _) = splitConjunctivePredicates(cond).partition {
50- case IsNotNull (a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet)
51- case _ => false
52- }
53-
54- // The columns that will filtered out by `IsNotNull` could be considered as not nullable.
55- protected val notNullAttributes : Seq [ExprId ] =
56- notNullPreds.flatMap(_.references).distinct.map(_.exprId)
57-
58- override def isNoop : Boolean = getRemainingCondition == null
59-
60- override def metricsUpdater (): MetricsUpdater = if (isNoop) {
61- MetricsUpdater .None
62- } else {
47+ override def metricsUpdater (): MetricsUpdater =
6348 BackendsApiManager .getMetricsApiInstance.genFilterTransformerMetricsUpdater(metrics)
64- }
6549
6650 def getRelNode (
6751 context : SubstraitContext ,
@@ -84,85 +68,58 @@ abstract class FilterExecTransformerBase(val cond: Expression, val input: SparkP
8468 )
8569 }
8670
87- override def output : Seq [Attribute ] = {
88- child.output.map {
89- a =>
90- if (a.nullable && notNullAttributes.contains(a.exprId)) {
91- a.withNullability(false )
92- } else {
93- a
94- }
95- }
96- }
71+ override def output : Seq [Attribute ] = FilterExecTransformerBase .buildNewOutput(child.output, cond)
9772
9873 override protected def orderingExpressions : Seq [SortOrder ] = child.outputOrdering
9974
10075 override protected def outputExpressions : Seq [NamedExpression ] = child.output
10176
102- // FIXME: Should use field "condition" to store the actual executed filter expressions.
103- // To make optimization easier (like to remove filter when it actually does nothing)
104- protected def getRemainingCondition : Expression = {
105- val scanFilters = child match {
106- // Get the filters including the manually pushed down ones.
107- case basicScanExecTransformer : BasicScanExecTransformer =>
108- basicScanExecTransformer.filterExprs()
109- // For fallback scan, we need to keep original filter.
110- case _ =>
111- Seq .empty[Expression ]
112- }
113- if (scanFilters.isEmpty) {
114- cond
115- } else {
116- val remainingFilters =
117- FilterHandler .getRemainingFilters(scanFilters, splitConjunctivePredicates(cond))
118- remainingFilters.reduceLeftOption(And ).orNull
119- }
120- }
121-
12277 override protected def doValidateInternal (): ValidationResult = {
123- val remainingCondition = getRemainingCondition
124- if (remainingCondition == null ) {
125- // All the filters can be pushed down and the computing of this Filter
126- // is not needed.
127- return ValidationResult .succeeded
128- }
12978 val substraitContext = new SubstraitContext
13079 val operatorId = substraitContext.nextOperatorId(this .nodeName)
13180 // Firstly, need to check if the Substrait plan for this operator can be successfully generated.
132- val relNode = getRelNode(
133- substraitContext,
134- remainingCondition,
135- child.output,
136- operatorId,
137- null ,
138- validation = true )
81+ val relNode =
82+ getRelNode(substraitContext, cond, child.output, operatorId, null , validation = true )
13983 // Then, validate the generated plan in native engine.
14084 doNativeValidation(substraitContext, relNode)
14185 }
14286
14387 override protected def doTransform (context : SubstraitContext ): TransformContext = {
14488 val childCtx = child.asInstanceOf [TransformSupport ].transform(context)
145- if (isNoop) {
146- // The computing for this filter is not needed.
147- // Since some columns' nullability will be removed after this filter, we need to update the
148- // outputAttributes of child context.
149- return TransformContext (output, childCtx.root)
150- }
151-
15289 val operatorId = context.nextOperatorId(this .nodeName)
153- val remainingCondition = getRemainingCondition
154- val currRel = getRelNode(
155- context,
156- remainingCondition,
157- child.output,
158- operatorId,
159- childCtx.root,
160- validation = false )
90+ val currRel =
91+ getRelNode(context, cond, child.output, operatorId, childCtx.root, validation = false )
16192 assert(currRel != null , " Filter rel should be valid." )
16293 TransformContext (output, currRel)
16394 }
16495}
16596
97+ object FilterExecTransformerBase extends PredicateHelper {
98+
99+ def buildNewOutput (output : Seq [Attribute ], cond : Expression ): Seq [Attribute ] = {
100+ buildNewOutput(output, splitConjunctivePredicates(cond))
101+ }
102+
103+ def buildNewOutput (output : Seq [Attribute ], conds : Seq [Expression ]): Seq [Attribute ] = {
104+ // Split out all the IsNotNulls from condition.
105+ val (notNullPreds, _) = conds.partition {
106+ case IsNotNull (a) => isNullIntolerant(a) && a.references.subsetOf(AttributeSet (output))
107+ case _ => false
108+ }
109+
110+ // The columns that will filter out by `IsNotNull` could be considered as not nullable.
111+ val notNullAttributes : Seq [ExprId ] = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
112+ output.map {
113+ a =>
114+ if (a.nullable && notNullAttributes.contains(a.exprId)) {
115+ a.withNullability(false )
116+ } else {
117+ a
118+ }
119+ }
120+ }
121+ }
122+
166123abstract class ProjectExecTransformerBase (val list : Seq [NamedExpression ], val input : SparkPlan )
167124 extends UnaryTransformSupport
168125 with OrderPreservingNodeShim
@@ -171,7 +128,7 @@ abstract class ProjectExecTransformerBase(val list: Seq[NamedExpression], val in
171128 with Logging {
172129
173130 // Note: "metrics" is made transient to avoid sending driver-side metrics to tasks.
174- @ transient override lazy val metrics =
131+ @ transient override lazy val metrics : Map [ String , SQLMetric ] =
175132 BackendsApiManager .getMetricsApiInstance.genProjectTransformerMetrics(sparkContext)
176133
177134 override protected def doValidateInternal (): ValidationResult = {
@@ -281,37 +238,11 @@ case class ColumnarUnionExec(children: Seq[SparkPlan]) extends ValidatablePlan {
281238}
282239
283240/**
284- * Contains functions for the comparision and separation of the filter conditions in Scan and
285- * Filter. Contains the function to manually push down the conditions into Scan.
241+ * Contains functions for the comparison and separation of the filter conditions in Scan and Filter.
242+ * Contains the function to manually push down the conditions into Scan.
286243 */
287244object FilterHandler extends PredicateHelper {
288245
289- /**
290- * Get the original filter conditions in Scan for the comparison with those in Filter.
291- *
292- * @param plan
293- * : the Spark plan
294- * @return
295- * If the plan is FileSourceScanExec or BatchScanExec, return the filter conditions in it.
296- * Otherwise, return empty sequence.
297- */
298- def getScanFilters (plan : SparkPlan ): Seq [Expression ] = {
299- plan match {
300- case fileSourceScan : FileSourceScanExec =>
301- fileSourceScan.dataFilters
302- case batchScan : BatchScanExec =>
303- batchScan.scan match {
304- case scan : FileScan =>
305- scan.dataFilters
306- case _ =>
307- throw new GlutenNotSupportException (
308- s " ${batchScan.scan.getClass.toString} is not supported " )
309- }
310- case _ =>
311- Seq ()
312- }
313- }
314-
315246 /**
316247 * Compare the semantics of the filter conditions pushed down to Scan and in the Filter.
317248 *
0 commit comments