Skip to content

Commit 0fbd34c

Browse files
committed
[SPARK-50377][SQL] Allow to evaluate foldable RuntimeReplaceable
### What changes were proposed in this pull request? This is to fix a regression caused by #47143 . The problem is, in some places, we want to get a constant from a foldable expression before the query execution starts. #47143 brings two problems: 1. `UnaryPositive` is no longer a `UnaryExpression`, which means it's not foldable anymore even if its child is foldable. 2. `UnaryPositive` is no longer evaluable. `Lag` is such a place. It may evaluate the `inputOffset` parameter eagerly. `lag(..., +1)` no longer works after #47143 . Instead of fixing `Lag`, this PR makes two changes and hopefully we can avoid all similar problems: 1. Make `UnaryPositive` extend `UnaryExpression` again. We need follow-up PRs to check other `RuntimeReplaceable` expressions and see if they should extend `UnaryExpression` or `BinaryExpression`, etc. 2. Implement `RuntimeReplaceable#eval` so that we can evaluate folding `RuntimeReplaceable` eagerly when needed. ### Why are the changes needed? Fix the regression on `lag` and avoid similar issues in the future. ### Does this PR introduce _any_ user-facing change? No, the regression is not released yet. ### How was this patch tested? new test ### Was this patch authored or co-authored using generative AI tooling? no Closes #48912 from cloud-fan/replace. Lead-authored-by: Wenchen Fan <[email protected]> Co-authored-by: Wenchen Fan <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent d9781d9 commit 0fbd34c

File tree

5 files changed

+26
-23
lines changed

5 files changed

+26
-23
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,8 +440,12 @@ trait RuntimeReplaceable extends Expression {
440440
// are semantically equal.
441441
override lazy val canonicalized: Expression = replacement.canonicalized
442442

443-
final override def eval(input: InternalRow = null): Any =
444-
throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
443+
final override def eval(input: InternalRow = null): Any = {
444+
// For convenience, we allow to evaluate `RuntimeReplaceable` expressions, in case we need to
445+
// get a constant from foldable expression before the query execution starts.
446+
assert(input == null)
447+
replacement.eval()
448+
}
445449
final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
446450
throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
447451
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,7 @@ case class UnaryMinus(
115115
since = "1.5.0",
116116
group = "math_funcs")
117117
case class UnaryPositive(child: Expression)
118-
extends RuntimeReplaceable with ImplicitCastInputTypes {
119-
override def nullIntolerant: Boolean = true
118+
extends UnaryExpression with RuntimeReplaceable with ImplicitCastInputTypes {
120119

121120
override def prettyName: String = "positive"
122121

@@ -128,11 +127,8 @@ case class UnaryPositive(child: Expression)
128127

129128
override lazy val replacement: Expression = child
130129

131-
override protected def withNewChildrenInternal(
132-
newChildren: IndexedSeq[Expression]): UnaryPositive =
133-
copy(newChildren.head)
134-
135-
override def children: Seq[Expression] = child :: Nil
130+
override protected def withNewChildInternal(newChild: Expression): Expression =
131+
copy(child = newChild)
136132
}
137133

138134
/**

sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,7 @@ SELECT
995995
lag(v, 1) IGNORE NULLS OVER w lag_1,
996996
lag(v, 2) IGNORE NULLS OVER w lag_2,
997997
lag(v, 3) IGNORE NULLS OVER w lag_3,
998+
lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
998999
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
9991000
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
10001001
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -1007,9 +1008,9 @@ WINDOW w AS (ORDER BY id)
10071008
ORDER BY id
10081009
-- !query analysis
10091010
Sort [id#x ASC NULLS FIRST], true
1010-
+- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x]
1011-
+- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, ... 7 more fields]
1012-
+- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 3)) AS lead_3#x, lag(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lag_0#x, lag(v#x, -1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS lag_1#x, lag(v#x, -2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS lag_2#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_3#x, nth_value(v#x, 1, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_1#x, nth_value(v#x, 2, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_2#x, nth_value(v#x, 3, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_3#x, first(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, any_value(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, last(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x], [id#x ASC NULLS FIRST]
1011+
+- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x]
1012+
+- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, ... 9 more fields]
1013+
+- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 3)) AS lead_3#x, lag(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lag_0#x, lag(v#x, -1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS lag_1#x, lag(v#x, -2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS lag_2#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_3#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_plus_3#x, nth_value(v#x, 1, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_1#x, nth_value(v#x, 2, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_2#x, nth_value(v#x, 3, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_3#x, first(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, any_value(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, last(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x], [id#x ASC NULLS FIRST]
10131014
+- Project [content#x, id#x, v#x]
10141015
+- SubqueryAlias test_ignore_null
10151016
+- View (`test_ignore_null`, [content#x, id#x, v#x])

sql/core/src/test/resources/sql-tests/inputs/window.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ SELECT
327327
lag(v, 1) IGNORE NULLS OVER w lag_1,
328328
lag(v, 2) IGNORE NULLS OVER w lag_2,
329329
lag(v, 3) IGNORE NULLS OVER w lag_3,
330+
lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
330331
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
331332
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
332333
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,

sql/core/src/test/resources/sql-tests/results/window.sql.out

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,7 @@ SELECT
10601060
lag(v, 1) IGNORE NULLS OVER w lag_1,
10611061
lag(v, 2) IGNORE NULLS OVER w lag_2,
10621062
lag(v, 3) IGNORE NULLS OVER w lag_3,
1063+
lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
10631064
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
10641065
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
10651066
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -1071,17 +1072,17 @@ FROM
10711072
WINDOW w AS (ORDER BY id)
10721073
ORDER BY id
10731074
-- !query schema
1074-
struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
1075-
-- !query output
1076-
a 0 NULL NULL x y z NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
1077-
a 1 x x y z v x NULL NULL NULL x NULL NULL x x x
1078-
b 2 NULL NULL y z v NULL x NULL NULL x NULL NULL x x x
1079-
c 3 NULL NULL y z v NULL x NULL NULL x NULL NULL x x x
1080-
a 4 y y z v NULL y x NULL NULL x y NULL x x y
1081-
b 5 NULL NULL z v NULL NULL y x NULL x y NULL x x y
1082-
a 6 z z v NULL NULL z y x NULL x y z x x z
1083-
a 7 v v NULL NULL NULL v z y x x y z x x v
1084-
a 8 NULL NULL NULL NULL NULL NULL v z y x y z x x v
1075+
struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,lag_plus_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
1076+
-- !query output
1077+
a 0 NULL NULL x y z NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
1078+
a 1 x x y z v x NULL NULL NULL NULL x NULL NULL x x x
1079+
b 2 NULL NULL y z v NULL x NULL NULL NULL x NULL NULL x x x
1080+
c 3 NULL NULL y z v NULL x NULL NULL NULL x NULL NULL x x x
1081+
a 4 y y z v NULL y x NULL NULL NULL x y NULL x x y
1082+
b 5 NULL NULL z v NULL NULL y x NULL NULL x y NULL x x y
1083+
a 6 z z v NULL NULL z y x NULL NULL x y z x x z
1084+
a 7 v v NULL NULL NULL v z y x x x y z x x v
1085+
a 8 NULL NULL NULL NULL NULL NULL v z y y x y z x x v
10851086

10861087

10871088
-- !query

0 commit comments

Comments
 (0)