Skip to content

Commit a76e4fa

Browse files
mikhailnik-dbcloud-fan
authored andcommitted
Revert [SPARK-54758][SQL] Fix generator resolution order in Project
### What changes were proposed in this pull request? Reverting my previous PR, because it broke LCA resolution #53527 ### Why are the changes needed? There are 2 problems with LCA: 1. Current implementation breaks a number of queries because it creates a circular dependancy: `Generator` waits for `UnresolvedFunction` and `UnresolvedFunction` waits for generator's LCA which waits `Generator` resolution. I have an idea how to do it properly, but first I want to revert this. 2. A more fundamental problem is that currently generators' LCA could be resolved from right to left, e.g., `SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr` works. This is quite bizarre, but I will need to break this behavior if I want to enforce left-to-right resolution. ### Does this PR introduce _any_ user-facing change? 'No' ### How was this patch tested? Add new tests to golden files ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor 2.2.14 Closes #53562 from mikhailnik-db/revert-generator-order-fix. Authored-by: Mikhail Nikoliukin <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 14cc174 commit a76e4fa

File tree

4 files changed

+49
-19
lines changed

4 files changed

+49
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,13 +3065,6 @@ class Analyzer(
30653065
})
30663066
}
30673067

3068-
private def hasUnresolvedGeneratorOrFunction(exprs: Seq[Expression]): Boolean = {
3069-
exprs.exists(_.exists {
3070-
case _: UnresolvedFunction | _: UnresolvedGenerator => true
3071-
case _ => false
3072-
})
3073-
}
3074-
30753068
private def trimAlias(expr: NamedExpression): Expression = expr match {
30763069
case UnresolvedAlias(child, _) => child
30773070
case Alias(child, _) => child
@@ -3163,21 +3156,15 @@ class Analyzer(
31633156
p
31643157

31653158
// The star will be expanded differently if we insert `Generate` under `Project` too early.
3166-
// We also wait for all functions and generators to be resolved to ensure left-to-right
3167-
// generator ordering.
3168-
case p @ Project(projectList, child)
3169-
if !projectList.exists(_.exists(_.isInstanceOf[Star])) &&
3170-
!hasUnresolvedGeneratorOrFunction(projectList) =>
3171-
var hasSeenGenerator = false
3159+
case p @ Project(projectList, child) if !projectList.exists(_.exists(_.isInstanceOf[Star])) =>
31723160
val (resolvedGenerator, newProjectList) = projectList
31733161
.map(trimNonTopLevelAliases)
31743162
.foldLeft((None: Option[Generate], Nil: Seq[NamedExpression])) { (res, e) =>
31753163
e match {
31763164
// If there are more than one generator, we only rewrite the first one and wait for
31773165
// the next analyzer iteration to rewrite the next one.
3178-
case AliasedGenerator(generator, names, outer) if !hasSeenGenerator &&
3166+
case AliasedGenerator(generator, names, outer) if res._1.isEmpty &&
31793167
generator.childrenResolved =>
3180-
hasSeenGenerator = true
31813168
val g = Generate(
31823169
generator,
31833170
unrequiredChildIndex = Nil,
@@ -3187,7 +3174,6 @@ class Analyzer(
31873174
child)
31883175
(Some(g), res._2 ++ g.nullableOutput)
31893176
case other =>
3190-
hasSeenGenerator |= hasGenerator(other)
31913177
(res._1, res._2 :+ other)
31923178
}
31933179
}

sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ Project [col#x, col#x]
3838
SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
3939
-- !query analysis
4040
Project [col#x, col#x]
41-
+- Generate explode(array(10, 20)), false, [col#x]
42-
+- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 as double))), false, [col#x]
41+
+- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 as double))), false, [col#x]
42+
+- Generate explode(array(10, 20)), false, [col#x]
4343
+- OneRowRelation
4444

4545

@@ -460,3 +460,21 @@ Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FO
460460
+- Window [count(1) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL]
461461
+- Aggregate [count(1) AS count(1)#xL]
462462
+- OneRowRelation
463+
464+
465+
-- !query
466+
SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col
467+
-- !query analysis
468+
Project [arr#x, col#x]
469+
+- Generate explode(arr#x), false, [col#x]
470+
+- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
471+
+- OneRowRelation
472+
473+
474+
-- !query
475+
SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr
476+
-- !query analysis
477+
Project [col#x, arr#x]
478+
+- Generate explode(arr#x), false, [col#x]
479+
+- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
480+
+- OneRowRelation

sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ SELECT 1 + explode(array(1, 2, 3));
77
-- multiple generators should work
88
SELECT explode(array(0, 1, 2)), explode(array(10, 20));
99

10-
-- multiple generators are processed in left-to-right order regardless of internal rule ordering
10+
-- multiple generators' order is not fixed and depends on rule ordering
1111
SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20));
1212

1313
-- multiple generators in aggregate should fail
@@ -127,3 +127,9 @@ SELECT explode(array(1, 2, 3)) as col, count(*) OVER ();
127127

128128
-- generator with window function and aggregate together resolves in order Aggregate -> Window -> Generator
129129
SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*);
130+
131+
-- generator LCA left-to-right should work
132+
SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col;
133+
134+
-- generator LCA right-to-left should work
135+
SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr;

sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,3 +460,23 @@ struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOL
460460
1 1 1
461461
2 1 1
462462
3 1 1
463+
464+
465+
-- !query
466+
SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col
467+
-- !query schema
468+
struct<arr:array<int>,col:int>
469+
-- !query output
470+
[0] 0
471+
[1] 1
472+
[2] 2
473+
474+
475+
-- !query
476+
SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr
477+
-- !query schema
478+
struct<col:int,arr:array<int>>
479+
-- !query output
480+
0 [0]
481+
1 [1]
482+
2 [2]

0 commit comments

Comments
 (0)