Skip to content

Commit 4dcbdcd

Browse files
committed
[SPARK-29863][SQL] Rename EveryAgg/AnyAgg to BoolAnd/BoolOr
### What changes were proposed in this pull request? rename EveryAgg/AnyAgg to BoolAnd/BoolOr ### Why are the changes needed? Under ansi mode, `every`, `any` and `some` are reserved keywords and can't be used as function names. `EveryAgg`/`AnyAgg` has several aliases and I think it's better to not pick reserved keywords as the primary name. ### Does this PR introduce any user-facing change? no ### How was this patch tested? existing tests Closes apache#26486 from cloud-fan/naming. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 942753a commit 4dcbdcd

File tree

7 files changed

+45
-45
lines changed

7 files changed

+45
-45
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,11 @@ object FunctionRegistry {
313313
expression[CollectList]("collect_list"),
314314
expression[CollectSet]("collect_set"),
315315
expression[CountMinSketchAgg]("count_min_sketch"),
316-
expression[EveryAgg]("every"),
317-
expression[EveryAgg]("bool_and"),
318-
expression[AnyAgg]("any"),
319-
expression[AnyAgg]("some"),
320-
expression[AnyAgg]("bool_or"),
316+
expression[BoolAnd]("every"),
317+
expression[BoolAnd]("bool_and"),
318+
expression[BoolOr]("any"),
319+
expression[BoolOr]("some"),
320+
expression[BoolOr]("bool_or"),
321321

322322
// string functions
323323
expression[Ascii]("ascii"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ abstract class UnevaluableBooleanAggBase(arg: Expression)
5252
false
5353
""",
5454
since = "3.0.0")
55-
case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
56-
override def nodeName: String = "Every"
55+
case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
56+
override def nodeName: String = "bool_and"
5757
}
5858

5959
@ExpressionDescription(
@@ -68,6 +68,6 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
6868
false
6969
""",
7070
since = "3.0.0")
71-
case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
72-
override def nodeName: String = "Any"
71+
case class BoolOr(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
72+
override def nodeName: String = "bool_or"
7373
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ object ReplaceExpressions extends Rule[LogicalPlan] {
4747
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
4848
case e: RuntimeReplaceable => e.child
4949
case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral))
50-
case AnyAgg(arg) => Max(arg)
51-
case EveryAgg(arg) => Min(arg)
50+
case BoolOr(arg) => Max(arg)
51+
case BoolAnd(arg) => Min(arg)
5252
}
5353
}
5454

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
144144
assertSuccess(Sum('stringField))
145145
assertSuccess(Average('stringField))
146146
assertSuccess(Min('arrayField))
147-
assertSuccess(new EveryAgg('booleanField))
148-
assertSuccess(new AnyAgg('booleanField))
147+
assertSuccess(new BoolAnd('booleanField))
148+
assertSuccess(new BoolOr('booleanField))
149149

150150
assertError(Min('mapField), "min does not support ordering on type")
151151
assertError(Max('mapField), "max does not support ordering on type")

sql/core/src/test/resources/sql-tests/results/group-by.sql.out

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -293,31 +293,31 @@ struct<>
293293
-- !query 31
294294
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
295295
-- !query 31 schema
296-
struct<every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
296+
struct<bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
297297
-- !query 31 output
298298
NULL NULL NULL NULL NULL
299299

300300

301301
-- !query 32
302302
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
303303
-- !query 32 schema
304-
struct<every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
304+
struct<bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
305305
-- !query 32 output
306306
NULL NULL NULL NULL NULL
307307

308308

309309
-- !query 33
310310
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
311311
-- !query 33 schema
312-
struct<every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
312+
struct<bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
313313
-- !query 33 output
314314
false true true false true
315315

316316

317317
-- !query 34
318318
SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
319319
-- !query 34 schema
320-
struct<k:int,every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
320+
struct<k:int,bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
321321
-- !query 34 output
322322
1 false true true false true
323323
2 true true true true true
@@ -329,7 +329,7 @@ struct<k:int,every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any
329329
-- !query 35
330330
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false
331331
-- !query 35 schema
332-
struct<k:int,every(v):boolean>
332+
struct<k:int,bool_and(v):boolean>
333333
-- !query 35 output
334334
1 false
335335
3 false
@@ -339,7 +339,7 @@ struct<k:int,every(v):boolean>
339339
-- !query 36
340340
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL
341341
-- !query 36 schema
342-
struct<k:int,every(v):boolean>
342+
struct<k:int,bool_and(v):boolean>
343343
-- !query 36 output
344344
4 NULL
345345

@@ -380,7 +380,7 @@ SELECT every(1)
380380
struct<>
381381
-- !query 39 output
382382
org.apache.spark.sql.AnalysisException
383-
cannot resolve 'every(1)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
383+
cannot resolve 'bool_and(1)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [int].; line 1 pos 7
384384

385385

386386
-- !query 40
@@ -389,7 +389,7 @@ SELECT some(1S)
389389
struct<>
390390
-- !query 40 output
391391
org.apache.spark.sql.AnalysisException
392-
cannot resolve 'any(1S)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7
392+
cannot resolve 'bool_or(1S)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [smallint].; line 1 pos 7
393393

394394

395395
-- !query 41
@@ -398,7 +398,7 @@ SELECT any(1L)
398398
struct<>
399399
-- !query 41 output
400400
org.apache.spark.sql.AnalysisException
401-
cannot resolve 'any(1L)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
401+
cannot resolve 'bool_or(1L)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [bigint].; line 1 pos 7
402402

403403

404404
-- !query 42
@@ -407,7 +407,7 @@ SELECT every("true")
407407
struct<>
408408
-- !query 42 output
409409
org.apache.spark.sql.AnalysisException
410-
cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 7
410+
cannot resolve 'bool_and('true')' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [string].; line 1 pos 7
411411

412412

413413
-- !query 43
@@ -416,7 +416,7 @@ SELECT bool_and(1.0)
416416
struct<>
417417
-- !query 43 output
418418
org.apache.spark.sql.AnalysisException
419-
cannot resolve 'every(1.0BD)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7
419+
cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7
420420

421421

422422
-- !query 44
@@ -425,13 +425,13 @@ SELECT bool_or(1.0D)
425425
struct<>
426426
-- !query 44 output
427427
org.apache.spark.sql.AnalysisException
428-
cannot resolve 'any(1.0D)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [double].; line 1 pos 7
428+
cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7
429429

430430

431431
-- !query 45
432432
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
433433
-- !query 45 schema
434-
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
434+
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
435435
-- !query 45 output
436436
1 false false
437437
1 true false
@@ -448,7 +448,7 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
448448
-- !query 46
449449
SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
450450
-- !query 46 schema
451-
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
451+
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
452452
-- !query 46 output
453453
1 false false
454454
1 true true
@@ -465,7 +465,7 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
465465
-- !query 47
466466
SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
467467
-- !query 47 schema
468-
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
468+
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
469469
-- !query 47 output
470470
1 false false
471471
1 true true
@@ -482,7 +482,7 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
482482
-- !query 48
483483
SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
484484
-- !query 48 schema
485-
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
485+
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
486486
-- !query 48 output
487487
1 false false
488488
1 true false
@@ -499,7 +499,7 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
499499
-- !query 49
500500
SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
501501
-- !query 49 schema
502-
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
502+
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
503503
-- !query 49 output
504504
1 false false
505505
1 true true

sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -293,31 +293,31 @@ struct<>
293293
-- !query 31
294294
SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0
295295
-- !query 31 schema
296-
struct<CAST(udf(cast(every(v) as string)) AS BOOLEAN):boolean,CAST(udf(cast(any(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
296+
struct<CAST(udf(cast(bool_and(v) as string)) AS BOOLEAN):boolean,CAST(udf(cast(bool_or(v) as string)) AS BOOLEAN):boolean,bool_or(v):boolean>
297297
-- !query 31 output
298298
NULL NULL NULL
299299

300300

301301
-- !query 32
302302
SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4
303303
-- !query 32 schema
304-
struct<CAST(udf(cast(every(cast(udf(cast(v as string)) as boolean)) as string)) AS BOOLEAN):boolean,any(v):boolean,any(v):boolean>
304+
struct<CAST(udf(cast(bool_and(cast(udf(cast(v as string)) as boolean)) as string)) AS BOOLEAN):boolean,bool_or(v):boolean,bool_or(v):boolean>
305305
-- !query 32 output
306306
NULL NULL NULL
307307

308308

309309
-- !query 33
310310
SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5
311311
-- !query 33 schema
312-
struct<every(v):boolean,CAST(udf(cast(any(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
312+
struct<bool_and(v):boolean,CAST(udf(cast(bool_or(v) as string)) AS BOOLEAN):boolean,bool_or(v):boolean>
313313
-- !query 33 output
314314
false true true
315315

316316

317317
-- !query 34
318318
SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k)
319319
-- !query 34 schema
320-
struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean,CAST(udf(cast(any(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
320+
struct<CAST(udf(cast(k as string)) AS INT):int,bool_and(v):boolean,CAST(udf(cast(bool_or(v) as string)) AS BOOLEAN):boolean,bool_or(v):boolean>
321321
-- !query 34 output
322322
1 false true true
323323
2 true true true
@@ -329,7 +329,7 @@ struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean,CAST(udf(cast(an
329329
-- !query 35
330330
SELECT udf(k), every(v) FROM test_agg GROUP BY k HAVING every(v) = false
331331
-- !query 35 schema
332-
struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean>
332+
struct<CAST(udf(cast(k as string)) AS INT):int,bool_and(v):boolean>
333333
-- !query 35 output
334334
1 false
335335
3 false
@@ -339,7 +339,7 @@ struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean>
339339
-- !query 36
340340
SELECT udf(k), udf(every(v)) FROM test_agg GROUP BY udf(k) HAVING every(v) IS NULL
341341
-- !query 36 schema
342-
struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(every(v) as string)) AS BOOLEAN):boolean>
342+
struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(bool_and(v) as string)) AS BOOLEAN):boolean>
343343
-- !query 36 output
344344
4 NULL
345345

@@ -380,7 +380,7 @@ SELECT every(udf(1))
380380
struct<>
381381
-- !query 39 output
382382
org.apache.spark.sql.AnalysisException
383-
cannot resolve 'every(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
383+
cannot resolve 'bool_and(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [int].; line 1 pos 7
384384

385385

386386
-- !query 40
@@ -389,7 +389,7 @@ SELECT some(udf(1S))
389389
struct<>
390390
-- !query 40 output
391391
org.apache.spark.sql.AnalysisException
392-
cannot resolve 'any(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7
392+
cannot resolve 'bool_or(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [smallint].; line 1 pos 7
393393

394394

395395
-- !query 41
@@ -398,7 +398,7 @@ SELECT any(udf(1L))
398398
struct<>
399399
-- !query 41 output
400400
org.apache.spark.sql.AnalysisException
401-
cannot resolve 'any(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
401+
cannot resolve 'bool_or(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [bigint].; line 1 pos 7
402402

403403

404404
-- !query 42
@@ -407,13 +407,13 @@ SELECT udf(every("true"))
407407
struct<>
408408
-- !query 42 output
409409
org.apache.spark.sql.AnalysisException
410-
cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 11
410+
cannot resolve 'bool_and('true')' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [string].; line 1 pos 11
411411

412412

413413
-- !query 43
414414
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
415415
-- !query 43 schema
416-
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
416+
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
417417
-- !query 43 output
418418
1 false false
419419
1 true false
@@ -430,7 +430,7 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
430430
-- !query 44
431431
SELECT k, udf(udf(v)), some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
432432
-- !query 44 schema
433-
struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) AS BOOLEAN):boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
433+
struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) AS BOOLEAN):boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
434434
-- !query 44 output
435435
1 false false
436436
1 true true
@@ -447,7 +447,7 @@ struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) A
447447
-- !query 45
448448
SELECT udf(udf(k)), v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
449449
-- !query 45 schema
450-
struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
450+
struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
451451
-- !query 45 output
452452
1 false false
453453
1 true true

sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
9595
// plan should show the rewritten aggregate expression.
9696
val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k")
9797
checkKeywordsExistsInExplain(df,
98-
"Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS any(v)#x, " +
99-
"max(v#x) AS any(v)#x]")
98+
"Aggregate [k#x], [k#x, min(v#x) AS bool_and(v)#x, max(v#x) AS bool_or(v)#x, " +
99+
"max(v#x) AS bool_or(v)#x]")
100100
}
101101
}
102102

0 commit comments

Comments
 (0)