Skip to content

Commit b89c3de

Browse files
zhulipengdongjoon-hyun
authored andcommitted
[SPARK-28310][SQL] Support (FIRST_VALUE|LAST_VALUE)(expr[ (IGNORE|RESPECT) NULLS]?) syntax
## What changes were proposed in this pull request? According to the ANSI SQL 2011 ![image](https://user-images.githubusercontent.com/698621/60855327-d01c6900-a235-11e9-9a1b-d438615a4673.png) Below are Teradata, Oracle, Redshift which already support this grammar. - Teradata - https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/SUwCpTupqmlBJvi2mipOaA - Oracle - https://docs.oracle.com/en/database/oracle/oracle-database/18/sqlrf/FIRST_VALUE.html#GUID-D454EC3F-370C-4C64-9B11-33FCB10D95EC - Redshift – https://docs.aws.amazon.com/redshift/latest/dg/r_WF_first_value.html - Postgresql didn't implement this grammar: https://www.postgresql.org/docs/devel/functions-window.html >The SQL standard defines a RESPECT NULLS or IGNORE NULLS option for lead, lag, first_value, last_value, and nth_value. This is not implemented in PostgreSQL: the behavior is always the same as the standard's default, namely RESPECT NULLS. ## How was this patch tested? UT. Closes apache#25082 from lipzhu/SPARK-28310. Authored-by: Zhu, Lipeng <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent bbc2be4 commit b89c3de

File tree

4 files changed

+26
-2
lines changed

4 files changed

+26
-2
lines changed

docs/sql-keywords.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ Below is a list of all the keywords in Spark SQL.
117117
<tr><td>FIELDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
118118
<tr><td>FILEFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
119119
<tr><td>FIRST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
120+
<tr><td>FIRST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
120121
<tr><td>FOLLOWING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
121122
<tr><td>FOR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
122123
<tr><td>FOREIGN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -151,6 +152,7 @@ Below is a list of all the keywords in Spark SQL.
151152
<tr><td>JOIN</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
152153
<tr><td>KEYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
153154
<tr><td>LAST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
155+
<tr><td>LAST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
154156
<tr><td>LATERAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
155157
<tr><td>LAZY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
156158
<tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -219,6 +221,7 @@ Below is a list of all the keywords in Spark SQL.
219221
<tr><td>REPAIR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
220222
<tr><td>REPLACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
221223
<tr><td>RESET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
224+
<tr><td>RESPECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
222225
<tr><td>RESTRICT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
223226
<tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
224227
<tr><td>RIGHT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ primaryExpression
680680
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
681681
| CAST '(' expression AS dataType ')' #cast
682682
| STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
683-
| FIRST '(' expression (IGNORE NULLS)? ')' #first
684-
| LAST '(' expression (IGNORE NULLS)? ')' #last
683+
| (FIRST | FIRST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')' #first
684+
| (LAST | LAST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')' #last
685685
| POSITION '(' substr=valueExpression IN str=valueExpression ')' #position
686686
| constant #constantDefault
687687
| ASTERISK #star
@@ -1023,6 +1023,7 @@ ansiNonReserved
10231023
| REPAIR
10241024
| REPLACE
10251025
| RESET
1026+
| RESPECT
10261027
| RESTRICT
10271028
| REVOKE
10281029
| RLIKE
@@ -1184,6 +1185,7 @@ nonReserved
11841185
| FIELDS
11851186
| FILEFORMAT
11861187
| FIRST
1188+
| FIRST_VALUE
11871189
| FOLLOWING
11881190
| FOR
11891191
| FOREIGN
@@ -1214,6 +1216,7 @@ nonReserved
12141216
| ITEMS
12151217
| KEYS
12161218
| LAST
1219+
| LAST_VALUE
12171220
| LATERAL
12181221
| LAZY
12191222
| LEADING
@@ -1278,6 +1281,7 @@ nonReserved
12781281
| REPAIR
12791282
| REPLACE
12801283
| RESET
1284+
| RESPECT
12811285
| RESTRICT
12821286
| REVOKE
12831287
| RLIKE
@@ -1435,6 +1439,7 @@ FETCH: 'FETCH';
14351439
FIELDS: 'FIELDS';
14361440
FILEFORMAT: 'FILEFORMAT';
14371441
FIRST: 'FIRST';
1442+
FIRST_VALUE: 'FIRST_VALUE';
14381443
FOLLOWING: 'FOLLOWING';
14391444
FOR: 'FOR';
14401445
FOREIGN: 'FOREIGN';
@@ -1469,6 +1474,7 @@ ITEMS: 'ITEMS';
14691474
JOIN: 'JOIN';
14701475
KEYS: 'KEYS';
14711476
LAST: 'LAST';
1477+
LAST_VALUE: 'LAST_VALUE';
14721478
LATERAL: 'LATERAL';
14731479
LAZY: 'LAZY';
14741480
LEADING: 'LEADING';
@@ -1536,6 +1542,7 @@ RENAME: 'RENAME';
15361542
REPAIR: 'REPAIR';
15371543
REPLACE: 'REPLACE';
15381544
RESET: 'RESET';
1545+
RESPECT: 'RESPECT';
15391546
RESTRICT: 'RESTRICT';
15401547
REVOKE: 'REVOKE';
15411548
RIGHT: 'RIGHT';

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,15 @@ class ExpressionParserSuite extends AnalysisTest {
737737
assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression())
738738
}
739739

740+
test("Support respect nulls keywords for first_value and last_value") {
741+
assertEqual("first_value(a ignore nulls)", First('a, Literal(true)).toAggregateExpression())
742+
assertEqual("first_value(a respect nulls)", First('a, Literal(false)).toAggregateExpression())
743+
assertEqual("first_value(a)", First('a, Literal(false)).toAggregateExpression())
744+
assertEqual("last_value(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression())
745+
assertEqual("last_value(a respect nulls)", Last('a, Literal(false)).toAggregateExpression())
746+
assertEqual("last_value(a)", Last('a, Literal(false)).toAggregateExpression())
747+
}
748+
740749
test("timestamp literals") {
741750
DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
742751
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone.getID) {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
381381
"fields",
382382
"fileformat",
383383
"first",
384+
"first_value",
384385
"following",
385386
"for",
386387
"foreign",
@@ -415,6 +416,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
415416
"join",
416417
"keys",
417418
"last",
419+
"last_value",
418420
"lateral",
419421
"lazy",
420422
"leading",
@@ -483,6 +485,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
483485
"repair",
484486
"replace",
485487
"reset",
488+
"respect",
486489
"restrict",
487490
"revoke",
488491
"right",
@@ -579,6 +582,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
579582
"except",
580583
"false",
581584
"fetch",
585+
"first_value",
582586
"for",
583587
"foreign",
584588
"from",
@@ -593,6 +597,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
593597
"into",
594598
"join",
595599
"is",
600+
"last_value",
596601
"leading",
597602
"left",
598603
"minute",

0 commit comments

Comments
 (0)