Skip to content

Commit 7a0bf9e

Browse files
pavle-martinovic_datacloud-fan
andcommitted
[SPARK-53127][SQL][FOLLOWUP] Clean up golden files for and add comments for LIMIT ALL in rCTEs
### What changes were proposed in this pull request? - Clean up golden files to remove isUnlimitedRecursion from stringArgs in the case it is false, as most golden files don't need it. - Add comments to explain the need to handle LIMIT ALL in a special way for recursive CTEs. ### Why are the changes needed? Code clean up. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Regenerated golden files. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #52468 from Pajaraja/pavle-martinovic_data/LimitAllCommentsAndRefactor. Lead-authored-by: pavle-martinovic_data <[email protected]> Co-authored-by: Wenchen Fan <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 6cdc62e commit 7a0bf9e

28 files changed

+340
-325
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyLimitAll.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ package org.apache.spark.sql.catalyst.analysis
2020
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, BaseEvalPython, CTERelationRef, Filter, Join, LimitAll, LogicalPlan, Offset, Project, SubqueryAlias, Union, Window}
2121
import org.apache.spark.sql.catalyst.rules.Rule
2222

23+
/**
24+
* Limit All is usually a no-op operation in spark, used for compatibility with other database
25+
* systems. However, in the case of recursive CTEs there is a default value (controlled by a flag)
26+
* of the maximum number of rows that a recursive CTE may return, which can be overridden by a Limit
27+
* operator above the UnionLoop node. Since this is a case where a Limit operator actually increases
28+
* the number of rows a node should return, Limit All stops being a no-op node semantically, and
29+
* should be used to enable unlimited looping in recursive CTEs.
30+
*/
2331
object ApplyLimitAll extends Rule[LogicalPlan] {
2432
private def applyLimitAllToPlan(plan: LogicalPlan, isInLimitAll: Boolean = false): LogicalPlan = {
2533
plan match {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/cteOperators.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ object CTERelationDef {
186186
* @param statsOpt The optional statistics inferred from the corresponding CTE
187187
* definition.
188188
* @param recursive If this is a recursive reference.
189+
* @param isUnlimitedRecursion If the node is a (non-recursive) reference to a recursive CTE that
190+
* should be executed without a limit to the number of rows it returns.
189191
*/
190192
case class CTERelationRef(
191193
cteId: Long,
@@ -201,6 +203,11 @@ case class CTERelationRef(
201203

202204
override lazy val resolved: Boolean = _resolved
203205

206+
override def stringArgs: Iterator[Any] = {
207+
// We omit the false value of isUnlimitedRecursion in golden files.
208+
if (isUnlimitedRecursion) super.stringArgs else super.stringArgs.toArray.init.iterator
209+
}
210+
204211
override def newInstance(): LogicalPlan = {
205212
// CTERelationRef inherits the output attributes from a query, which may contain duplicated
206213
// attributes, for queries like `SELECT a, a FROM t`. It's important to keep the duplicated

sql/core/src/test/resources/sql-tests/analyzer-results/collations-aliases.sql.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ WithCTE
183183
: +- Relation spark_catalog.default.t1[s#x,utf8_binary#x,utf8_lcase#x] parquet
184184
+- Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
185185
+- SubqueryAlias cte
186-
+- CTERelationRef xxxx, true, [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x], false, false, false
186+
+- CTERelationRef xxxx, true, [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x], false, false
187187

188188

189189
-- !query
@@ -232,7 +232,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
232232
: +- LocalLimit 1
233233
: +- Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
234234
: +- SubqueryAlias cte
235-
: +- CTERelationRef xxxx, true, [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x], false, false, false
235+
: +- CTERelationRef xxxx, true, [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x], false, false
236236
+- OneRowRelation
237237

238238

sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`cte_tbl`, ErrorI
1010
: +- OneRowRelation
1111
+- Project [col#x]
1212
+- SubqueryAlias s
13-
+- CTERelationRef xxxx, true, [col#x], false, false, 1, false
13+
+- CTERelationRef xxxx, true, [col#x], false, false, 1
1414

1515

1616
-- !query
@@ -32,7 +32,7 @@ CreateViewCommand `cte_view`, WITH s AS (SELECT 42 AS col) SELECT * FROM s, fals
3232
: +- OneRowRelation
3333
+- Project [col#x]
3434
+- SubqueryAlias s
35-
+- CTERelationRef xxxx, true, [col#x], false, false, 1, false
35+
+- CTERelationRef xxxx, true, [col#x], false, false, 1
3636

3737

3838
-- !query
@@ -49,7 +49,7 @@ Project [col#x]
4949
: +- OneRowRelation
5050
+- Project [col#x]
5151
+- SubqueryAlias s
52-
+- CTERelationRef xxxx, true, [col#x], false, false, 1, false
52+
+- CTERelationRef xxxx, true, [col#x], false, false, 1
5353

5454

5555
-- !query
@@ -65,7 +65,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
6565
: +- OneRowRelation
6666
+- Project [col#x]
6767
+- SubqueryAlias S
68-
+- CTERelationRef xxxx, true, [col#x], false, false, 1, false
68+
+- CTERelationRef xxxx, true, [col#x], false, false, 1
6969

7070

7171
-- !query
@@ -88,7 +88,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
8888
: +- OneRowRelation
8989
+- Project [col#x]
9090
+- SubqueryAlias s
91-
+- CTERelationRef xxxx, true, [col#x], false, false, 1, false
91+
+- CTERelationRef xxxx, true, [col#x], false, false, 1
9292

9393

9494
-- !query

sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ WithCTE
1515
: +- SubqueryAlias t
1616
: +- Project [1#x]
1717
: +- SubqueryAlias t2
18-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
18+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
1919
+- Project [1#x]
2020
+- SubqueryAlias t
21-
+- CTERelationRef xxxx, true, [1#x], false, false, 1, false
21+
+- CTERelationRef xxxx, true, [1#x], false, false, 1
2222

2323

2424
-- !query
@@ -37,7 +37,7 @@ Aggregate [max(c#x) AS max(c)#x]
3737
: +- OneRowRelation
3838
+- Project [c#x]
3939
+- SubqueryAlias t
40-
+- CTERelationRef xxxx, true, [c#x], false, false, 1, false
40+
+- CTERelationRef xxxx, true, [c#x], false, false, 1
4141

4242

4343
-- !query
@@ -54,7 +54,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
5454
: : +- OneRowRelation
5555
: +- Project [1#x]
5656
: +- SubqueryAlias t
57-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
57+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
5858
+- OneRowRelation
5959

6060

@@ -140,10 +140,10 @@ WithCTE
140140
: +- SubqueryAlias t2
141141
: +- Project [2#x]
142142
: +- SubqueryAlias t
143-
: +- CTERelationRef xxxx, true, [2#x], false, false, 1, false
143+
: +- CTERelationRef xxxx, true, [2#x], false, false, 1
144144
+- Project [2#x]
145145
+- SubqueryAlias t2
146-
+- CTERelationRef xxxx, true, [2#x], false, false, 1, false
146+
+- CTERelationRef xxxx, true, [2#x], false, false, 1
147147

148148

149149
-- !query
@@ -178,11 +178,11 @@ WithCTE
178178
: : : +- OneRowRelation
179179
: : +- Project [c#x]
180180
: : +- SubqueryAlias t
181-
: : +- CTERelationRef xxxx, true, [c#x], false, false, 1, false
181+
: : +- CTERelationRef xxxx, true, [c#x], false, false, 1
182182
: +- OneRowRelation
183183
+- Project [scalarsubquery()#x]
184184
+- SubqueryAlias t2
185-
+- CTERelationRef xxxx, true, [scalarsubquery()#x], false, false, 1, false
185+
+- CTERelationRef xxxx, true, [scalarsubquery()#x], false, false, 1
186186

187187

188188
-- !query
@@ -215,15 +215,15 @@ WithCTE
215215
: +- SubqueryAlias t2
216216
: +- Project [3#x]
217217
: +- SubqueryAlias t
218-
: +- CTERelationRef xxxx, true, [3#x], false, false, 1, false
218+
: +- CTERelationRef xxxx, true, [3#x], false, false, 1
219219
:- CTERelationDef xxxx, false
220220
: +- SubqueryAlias t2
221221
: +- Project [3#x]
222222
: +- SubqueryAlias t2
223-
: +- CTERelationRef xxxx, true, [3#x], false, false, 1, false
223+
: +- CTERelationRef xxxx, true, [3#x], false, false, 1
224224
+- Project [3#x]
225225
+- SubqueryAlias t2
226-
+- CTERelationRef xxxx, true, [3#x], false, false, 1, false
226+
+- CTERelationRef xxxx, true, [3#x], false, false, 1
227227

228228

229229
-- !query
@@ -248,7 +248,7 @@ WithCTE
248248
+- SubqueryAlias __auto_generated_subquery_name
249249
+- Project [c#x]
250250
+- SubqueryAlias t
251-
+- CTERelationRef xxxx, true, [c#x], false, false, 1, false
251+
+- CTERelationRef xxxx, true, [c#x], false, false, 1
252252

253253

254254
-- !query
@@ -277,7 +277,7 @@ WithCTE
277277
+- SubqueryAlias __auto_generated_subquery_name
278278
+- Project [c#x]
279279
+- SubqueryAlias t
280-
+- CTERelationRef xxxx, true, [c#x], false, false, 1, false
280+
+- CTERelationRef xxxx, true, [c#x], false, false, 1
281281

282282

283283
-- !query
@@ -312,7 +312,7 @@ WithCTE
312312
+- SubqueryAlias __auto_generated_subquery_name
313313
+- Project [c#x]
314314
+- SubqueryAlias t
315-
+- CTERelationRef xxxx, true, [c#x], false, false, 1, false
315+
+- CTERelationRef xxxx, true, [c#x], false, false, 1
316316

317317

318318
-- !query
@@ -335,7 +335,7 @@ WithCTE
335335
: : +- OneRowRelation
336336
: +- Project [2#x]
337337
: +- SubqueryAlias t
338-
: +- CTERelationRef xxxx, true, [2#x], false, false, 1, false
338+
: +- CTERelationRef xxxx, true, [2#x], false, false, 1
339339
+- OneRowRelation
340340

341341

@@ -362,7 +362,7 @@ WithCTE
362362
: : : +- OneRowRelation
363363
: : +- Project [2#x]
364364
: : +- SubqueryAlias t
365-
: : +- CTERelationRef xxxx, true, [2#x], false, false, 1, false
365+
: : +- CTERelationRef xxxx, true, [2#x], false, false, 1
366366
: +- OneRowRelation
367367
+- OneRowRelation
368368

@@ -396,7 +396,7 @@ WithCTE
396396
: : : +- OneRowRelation
397397
: : +- Project [3#x]
398398
: : +- SubqueryAlias t
399-
: : +- CTERelationRef xxxx, true, [3#x], false, false, 1, false
399+
: : +- CTERelationRef xxxx, true, [3#x], false, false, 1
400400
: +- OneRowRelation
401401
+- OneRowRelation
402402

@@ -425,9 +425,9 @@ WithCTE
425425
: : +- OneRowRelation
426426
: +- Project [c#x]
427427
: +- SubqueryAlias t
428-
: +- CTERelationRef xxxx, true, [c#x], false, false, 1, false
428+
: +- CTERelationRef xxxx, true, [c#x], false, false, 1
429429
+- SubqueryAlias t
430-
+- CTERelationRef xxxx, true, [c#x], false, false, 1, false
430+
+- CTERelationRef xxxx, true, [c#x], false, false, 1
431431

432432

433433
-- !query
@@ -448,14 +448,14 @@ WithCTE
448448
: +- SubqueryAlias t
449449
: +- Project [1#x]
450450
: +- SubqueryAlias t2
451-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
451+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
452452
:- CTERelationDef xxxx, false
453453
: +- SubqueryAlias t2
454454
: +- Project [2 AS 2#x]
455455
: +- OneRowRelation
456456
+- Project [1#x]
457457
+- SubqueryAlias t
458-
+- CTERelationRef xxxx, true, [1#x], false, false, 1, false
458+
+- CTERelationRef xxxx, true, [1#x], false, false, 1
459459

460460

461461
-- !query
@@ -480,10 +480,10 @@ WithCTE
480480
: +- SubqueryAlias t
481481
: +- Project [2#x]
482482
: +- SubqueryAlias aBC
483-
: +- CTERelationRef xxxx, true, [2#x], false, false, 1, false
483+
: +- CTERelationRef xxxx, true, [2#x], false, false, 1
484484
+- Project [2#x]
485485
+- SubqueryAlias t
486-
+- CTERelationRef xxxx, true, [2#x], false, false, 1, false
486+
+- CTERelationRef xxxx, true, [2#x], false, false, 1
487487

488488

489489
-- !query
@@ -506,7 +506,7 @@ WithCTE
506506
: : +- OneRowRelation
507507
: +- Project [2#x]
508508
: +- SubqueryAlias aBC
509-
: +- CTERelationRef xxxx, true, [2#x], false, false, 1, false
509+
: +- CTERelationRef xxxx, true, [2#x], false, false, 1
510510
+- OneRowRelation
511511

512512

@@ -530,15 +530,15 @@ WithCTE
530530
: +- SubqueryAlias t3
531531
: +- Project [1#x]
532532
: +- SubqueryAlias t1
533-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
533+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
534534
:- CTERelationDef xxxx, false
535535
: +- SubqueryAlias t2
536536
: +- Project [1#x]
537537
: +- SubqueryAlias t3
538-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
538+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
539539
+- Project [1#x]
540540
+- SubqueryAlias t2
541-
+- CTERelationRef xxxx, true, [1#x], false, false, 1, false
541+
+- CTERelationRef xxxx, true, [1#x], false, false, 1
542542

543543

544544
-- !query
@@ -561,12 +561,12 @@ WithCTE
561561
: +- SubqueryAlias cte_inner
562562
: +- Project [1#x]
563563
: +- SubqueryAlias cte_outer
564-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
564+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
565565
+- Project [1#x]
566566
+- SubqueryAlias __auto_generated_subquery_name
567567
+- Project [1#x]
568568
+- SubqueryAlias cte_inner
569-
+- CTERelationRef xxxx, true, [1#x], false, false, 1, false
569+
+- CTERelationRef xxxx, true, [1#x], false, false, 1
570570

571571

572572
-- !query
@@ -594,19 +594,19 @@ WithCTE
594594
: +- SubqueryAlias cte_inner_inner
595595
: +- Project [1#x]
596596
: +- SubqueryAlias cte_outer
597-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
597+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
598598
:- CTERelationDef xxxx, false
599599
: +- SubqueryAlias cte_inner
600600
: +- Project [1#x]
601601
: +- SubqueryAlias __auto_generated_subquery_name
602602
: +- Project [1#x]
603603
: +- SubqueryAlias cte_inner_inner
604-
: +- CTERelationRef xxxx, true, [1#x], false, false, 1, false
604+
: +- CTERelationRef xxxx, true, [1#x], false, false, 1
605605
+- Project [1#x]
606606
+- SubqueryAlias __auto_generated_subquery_name
607607
+- Project [1#x]
608608
+- SubqueryAlias cte_inner
609-
+- CTERelationRef xxxx, true, [1#x], false, false, 1, false
609+
+- CTERelationRef xxxx, true, [1#x], false, false, 1
610610

611611

612612
-- !query

0 commit comments

Comments
 (0)