@@ -24,8 +24,10 @@ import org.scalatest.Tag
2424
2525import org .apache .hadoop .fs .Path
2626import org .apache .spark .sql .{CometTestBase , Row }
27+ import org .apache .spark .sql .comet .CometWindowExec
28+ import org .apache .spark .sql .comet .execution .shuffle .CometShuffleExchangeExec
2729import org .apache .spark .sql .expressions .Window
28- import org .apache .spark .sql .functions .{count , lead }
30+ import org .apache .spark .sql .functions .{count , lead , sum }
2931import org .apache .spark .sql .internal .SQLConf
3032
3133import org .apache .comet .CometConf
@@ -39,12 +41,86 @@ class CometWindowExecSuite extends CometTestBase {
3941 super .test(testName, testTags : _* ) {
4042 withSQLConf(
4143 CometConf .COMET_EXEC_SHUFFLE_ENABLED .key -> " true" ,
44+ CometConf .COMET_EXEC_WINDOW_ENABLED .key -> " true" ,
4245 CometConf .COMET_NATIVE_SCAN_IMPL .key -> CometConf .SCAN_AUTO ) {
4346 testFun
4447 }
4548 }
4649 }
4750
51+ test(" lead/lag should return the default value if the offset row does not exist" ) {
52+ withSQLConf(
53+ CometConf .COMET_ENABLED .key -> " true" ,
54+ CometConf .COMET_EXEC_SHUFFLE_ENABLED .key -> " true" ,
55+ CometConf .COMET_SHUFFLE_MODE .key -> " jvm" ) {
56+ checkSparkAnswer(sql("""
57+ |SELECT
58+ | lag(123, 100, 321) OVER (ORDER BY id) as lag,
59+ | lead(123, 100, 321) OVER (ORDER BY id) as lead
60+ |FROM (SELECT 1 as id) tmp
61+ """ .stripMargin))
62+
63+ checkSparkAnswer(sql("""
64+ |SELECT
65+ | lag(123, 100, a) OVER (ORDER BY id) as lag,
66+ | lead(123, 100, a) OVER (ORDER BY id) as lead
67+ |FROM (SELECT 1 as id, 2 as a) tmp
68+ """ .stripMargin))
69+ }
70+ }
71+
72+ test(" window query with rangeBetween" ) {
73+
74+ // values are int
75+ val df = Seq (1 , 2 , 4 , 3 , 2 , 1 ).toDF(" value" )
76+ val window = Window .orderBy($" value" .desc)
77+
78+ // ranges are long
79+ val df2 = df.select(
80+ $" value" ,
81+ sum($" value" ).over(window.rangeBetween(Window .unboundedPreceding, 1L )),
82+ sum($" value" ).over(window.rangeBetween(1L , Window .unboundedFollowing)))
83+
84+ // Comet does not support RANGE BETWEEN
85+ // https://github.com/apache/datafusion-comet/issues/1246
86+ val (_, cometPlan) = checkSparkAnswer(df2)
87+ val cometWindowExecs = collect(cometPlan) { case w : CometWindowExec =>
88+ w
89+ }
90+ assert(cometWindowExecs.isEmpty)
91+ }
92+
93+ // based on Spark's SQLWindowFunctionSuite test of the same name
94+ test(" window function: partition and order expressions" ) {
95+ for (shuffleMode <- Seq (" auto" , " native" , " jvm" )) {
96+ withSQLConf(CometConf .COMET_SHUFFLE_MODE .key -> shuffleMode) {
97+ val df =
98+ Seq ((1 , " a" , 5 ), (2 , " a" , 6 ), (3 , " b" , 7 ), (4 , " b" , 8 ), (5 , " c" , 9 ), (6 , " c" , 10 )).toDF(
99+ " month" ,
100+ " area" ,
101+ " product" )
102+ df.createOrReplaceTempView(" windowData" )
103+ val df2 = sql("""
104+ |select month, area, product, sum(product + 1) over (partition by 1 order by 2)
105+ |from windowData
106+ """ .stripMargin)
107+ checkSparkAnswer(df2)
108+ val cometShuffles = collect(df2.queryExecution.executedPlan) {
109+ case _ : CometShuffleExchangeExec => true
110+ }
111+ if (shuffleMode == " jvm" || shuffleMode == " auto" ) {
112+ assert(cometShuffles.length == 1 )
113+ } else {
114+ // we fall back to Spark for shuffle because we do not support
115+ // native shuffle with a LocalTableScan input, and we do not fall
116+ // back to Comet columnar shuffle due to
117+ // https://github.com/apache/datafusion-comet/issues/1248
118+ assert(cometShuffles.isEmpty)
119+ }
120+ }
121+ }
122+ }
123+
48124 test(
49125 " fall back to Spark when the partition spec and order spec are not the same for window function" ) {
50126 withTempView(" test" ) {
@@ -289,7 +365,7 @@ class CometWindowExecSuite extends CometTestBase {
289365
290366 // TODO: COUNT with ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW produces incorrect results
291367 // Returns wrong cnt values - ordering issue causes swapped values for rows with same partition
292- ignore (" window: COUNT with ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" ) {
368+ test (" window: COUNT with ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" ) {
293369 withTempDir { dir =>
294370 (0 until 30 )
295371 .map(i => (i % 3 , i % 5 , i))
@@ -310,7 +386,6 @@ class CometWindowExecSuite extends CometTestBase {
310386 }
311387
312388 // TODO: SUM with ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING produces incorrect results
313- // Returns wrong sum_c values - ordering issue causes swapped values for rows with same partition
314389 ignore(" window: SUM with ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING" ) {
315390 withTempDir { dir =>
316391 (0 until 30 )
@@ -354,7 +429,6 @@ class CometWindowExecSuite extends CometTestBase {
354429 }
355430
356431 // TODO: SUM with ROWS BETWEEN produces incorrect results
357- // Returns wrong sum_c values for some rows
358432 ignore(" window: SUM with ROWS BETWEEN 2 PRECEDING AND CURRENT ROW" ) {
359433 withTempDir { dir =>
360434 (0 until 30 )
@@ -530,7 +604,6 @@ class CometWindowExecSuite extends CometTestBase {
530604 }
531605
532606 // TODO: LAG produces incorrect results
533- // Returns wrong lag_c values - ordering issue in results
534607 ignore(" window: LAG with default offset" ) {
535608 withTempDir { dir =>
536609 (0 until 30 )
@@ -552,7 +625,6 @@ class CometWindowExecSuite extends CometTestBase {
552625 }
553626
554627 // TODO: LAG with offset 2 produces incorrect results
555- // Returns wrong lag_c_2 values - ordering issue in results
556628 ignore(" window: LAG with offset 2 and default value" ) {
557629 withTempDir { dir =>
558630 (0 until 30 )
@@ -574,7 +646,6 @@ class CometWindowExecSuite extends CometTestBase {
574646 }
575647
576648 // TODO: LEAD produces incorrect results
577- // Returns wrong lead_c values - ordering issue in results
578649 ignore(" window: LEAD with default offset" ) {
579650 withTempDir { dir =>
580651 (0 until 30 )
@@ -596,7 +667,6 @@ class CometWindowExecSuite extends CometTestBase {
596667 }
597668
598669 // TODO: LEAD with offset 2 produces incorrect results
599- // Returns wrong lead_c_2 values - ordering issue in results
600670 ignore(" window: LEAD with offset 2 and default value" ) {
601671 withTempDir { dir =>
602672 (0 until 30 )
@@ -662,7 +732,6 @@ class CometWindowExecSuite extends CometTestBase {
662732 }
663733
664734 // TODO: NTH_VALUE returns incorrect results - produces 0 instead of null for first row,
665- // and incorrect values for subsequent rows in partition
666735 ignore(" window: NTH_VALUE with position 2" ) {
667736 withTempDir { dir =>
668737 (0 until 30 )
@@ -706,7 +775,6 @@ class CometWindowExecSuite extends CometTestBase {
706775 }
707776
708777 // TODO: Multiple window functions with mixed frame types (RowFrame and RangeFrame)
709- // produces incorrect row_num values - ordering issue in results
710778 ignore(" window: multiple window functions in single query" ) {
711779 withTempDir { dir =>
712780 (0 until 30 )
@@ -933,7 +1001,6 @@ class CometWindowExecSuite extends CometTestBase {
9331001 }
9341002
9351003 // TODO: ROWS BETWEEN with negative offset produces incorrect results
936- // Returns wrong values for avg_c calculation
9371004 ignore(" window: ROWS BETWEEN with negative offset" ) {
9381005 withTempDir { dir =>
9391006 (0 until 30 )
@@ -955,7 +1022,6 @@ class CometWindowExecSuite extends CometTestBase {
9551022 }
9561023
9571024 // TODO: All ranking functions together produce incorrect row_num values
958- // Ordering issue causes row numbers to be swapped for rows with same partition/order values
9591025 ignore(" window: all ranking functions together" ) {
9601026 withTempDir { dir =>
9611027 (0 until 30 )
@@ -980,5 +1046,4 @@ class CometWindowExecSuite extends CometTestBase {
9801046 checkSparkAnswerAndOperator(df)
9811047 }
9821048 }
983-
9841049}
0 commit comments