Skip to content

Commit a341a57

Browse files
authored
fix: Update TPC-DS q36a golden file for Spark 4.0 decimal UNION widening change (#3915)
1 parent 9052f2b commit a341a57

File tree

4 files changed

+161
-7
lines changed

4 files changed

+161
-7
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
-- Automatically generated by CometTPCDSQuerySuite
2+
3+
-- !query schema
4+
struct<gross_margin:decimal(38,11),i_category:string,i_class:string,lochierarchy:int,rank_within_parent:int>
5+
-- !query output
6+
-0.43310777865 NULL NULL 2 1
7+
-0.44057752675 Home NULL 1 1
8+
-0.43759152110 Music NULL 1 2
9+
-0.43708103961 NULL NULL 1 3
10+
-0.43616253139 Shoes NULL 1 4
11+
-0.43567118609 Children NULL 1 5
12+
-0.43423932352 Sports NULL 1 6
13+
-0.43342977300 Electronics NULL 1 7
14+
-0.43243283121 Women NULL 1 8
15+
-0.43164166900 Men NULL 1 9
16+
-0.42516187690 Books NULL 1 10
17+
-0.42448713381 Jewelry NULL 1 11
18+
-0.73902664239 NULL shirts 0 1
19+
-0.61125804874 NULL country 0 2
20+
-0.53129803597 NULL dresses 0 3
21+
-0.51266635289 NULL athletic 0 4
22+
-0.45290387784 NULL mens 0 5
23+
-0.41288056662 NULL accessories 0 6
24+
-0.40784754677 NULL NULL 0 7
25+
-0.34254844861 NULL baseball 0 8
26+
-0.32511461676 NULL infants 0 9
27+
-0.44733955705 Books computers 0 1
28+
-0.44221358113 Books home repair 0 2
29+
-0.44131129175 Books romance 0 3
30+
-0.43954111564 Books history 0 4
31+
-0.43921337505 Books mystery 0 5
32+
-0.43904020269 Books sports 0 6
33+
-0.42821477000 Books travel 0 7
34+
-0.42609067296 Books cooking 0 8
35+
-0.42538995145 Books fiction 0 9
36+
-0.42446563616 Books arts 0 10
37+
-0.42424821312 Books parenting 0 11
38+
-0.41822014479 Books reference 0 12
39+
-0.41350839326 Books business 0 13
40+
-0.40935208137 Books science 0 14
41+
-0.40159380736 Books self-help 0 15
42+
-0.36957884843 Books entertainments 0 16
43+
-0.44602461557 Children school-uniforms 0 1
44+
-0.44141106040 Children toddlers 0 2
45+
-0.43479886701 Children infants 0 3
46+
-0.41900662972 Children newborn 0 4
47+
-0.41526603782 Children NULL 0 5
48+
-0.45347482219 Electronics personal 0 1
49+
-0.44349670350 Electronics stereo 0 2
50+
-0.44262427233 Electronics automotive 0 3
51+
-0.44115886173 Electronics portable 0 4
52+
-0.43972786652 Electronics memory 0 5
53+
-0.43889275272 Electronics scanners 0 6
54+
-0.43879181695 Electronics karoke 0 7
55+
-0.43743655150 Electronics dvd/vcr players 0 8
56+
-0.43737666391 Electronics cameras 0 9
57+
-0.43390499017 Electronics wireless 0 10
58+
-0.43163869754 Electronics audio 0 11
59+
-0.42958938670 Electronics camcorders 0 12
60+
-0.42872845804 Electronics musical 0 13
61+
-0.42228240153 Electronics televisions 0 14
62+
-0.41893847772 Electronics monitors 0 15
63+
-0.39793878023 Electronics disk drives 0 16
64+
-0.49051156861 Home NULL 0 1
65+
-0.48431476751 Home blinds/shades 0 2
66+
-0.47545837942 Home bathroom 0 3
67+
-0.45726228921 Home rugs 0 4
68+
-0.45540507569 Home furniture 0 5
69+
-0.45303572267 Home flatware 0 6
70+
-0.44755542058 Home tables 0 7
71+
-0.44419847781 Home wallpaper 0 8
72+
-0.44092345227 Home glassware 0 9
73+
-0.43877591834 Home decor 0 10
74+
-0.43765482554 Home accent 0 11
75+
-0.43188199219 Home bedding 0 12
76+
-0.43107417904 Home kids 0 13
77+
-0.42474436356 Home lighting 0 14
78+
-0.41783311109 Home curtains/drapes 0 15
79+
-0.41767111807 Home mattresses 0 16
80+
-0.40562188699 Home paint 0 17
81+
-0.45165056505 Jewelry jewelry boxes 0 1
82+
-0.44372227805 Jewelry estate 0 2
83+
-0.44251815033 Jewelry gold 0 3
84+
-0.43978127754 Jewelry consignment 0 4
85+
-0.43821750044 Jewelry custom 0 5
86+
-0.43439645036 Jewelry bracelets 0 6
87+
-0.43208398326 Jewelry loose stones 0 7
88+
-0.43060897375 Jewelry diamonds 0 8
89+
-0.42847505749 Jewelry costume 0 9
90+
-0.42667449062 Jewelry rings 0 10
91+
-0.41987969012 Jewelry mens watch 0 11
92+
-0.41624621973 Jewelry semi-precious 0 12
93+
-0.41148949162 Jewelry womens watch 0 13
94+
-0.39725668175 Jewelry birdal 0 14
95+
-0.39665274052 Jewelry pendants 0 15
96+
-0.38423525233 Jewelry earings 0 16
97+
-0.44464388888 Men shirts 0 1
98+
-0.43719860801 Men accessories 0 2
99+
-0.43164606665 Men sports-apparel 0 3
100+
-0.41530906677 Men pants 0 4
101+
-0.38332708895 Men NULL 0 5
102+
-0.47339698706 Music NULL 0 1
103+
-0.44193214675 Music rock 0 2
104+
-0.44008174914 Music country 0 3
105+
-0.43863444992 Music pop 0 4

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
167167
{
168168
val decimalLiteral = Decimal(0.00)
169169
val cometDf = df.select($"dec" / decimalLiteral, $"dec" % decimalLiteral)
170+
checkSparkSchema(cometDf)
170171
checkSparkAnswerAndOperator(cometDf)
171172
}
172173
}
@@ -1215,6 +1216,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
12151216
val cometDf = df.select(
12161217
$"dec1" + $"dec2",
12171218
$"dec1" - $"dec2",
1219+
$"dec1" / $"dec2",
12181220
$"dec1" % $"dec2",
12191221
$"dec1" >= $"dec1",
12201222
$"dec1" === "1.0",
@@ -1229,6 +1231,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
12291231
.select(
12301232
$"dec1" + $"dec2",
12311233
$"dec1" - $"dec2",
1234+
$"dec1" / $"dec2",
12321235
$"dec1" % $"dec2",
12331236
$"dec1" >= $"dec1",
12341237
$"dec1" === "1.0",
@@ -1238,6 +1241,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
12381241
$"dec1" - decimalLiteral2)
12391242
.collect()
12401243
.toSeq)
1244+
checkSparkSchema(cometDf)
12411245
}
12421246
}
12431247
}
@@ -1253,7 +1257,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
12531257
sql("CREATE TABLE tbl (a INT) USING PARQUET")
12541258
sql("INSERT INTO tbl VALUES (0)")
12551259

1256-
val combinations = Seq((7, 3), (18, 10), (38, 4))
1260+
val combinations = Seq((7, 3), (18, 10), (27, 2), (38, 4))
12571261
for ((precision, scale) <- combinations) {
12581262
for (op <- Seq("+", "-", "*", "/", "%")) {
12591263
val left = s"CAST(1.00 AS DECIMAL($precision, $scale))"
@@ -1262,15 +1266,40 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
12621266
withSQLConf(
12631267
"spark.sql.optimizer.excludedRules" ->
12641268
"org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
1265-
1266-
checkSparkAnswerAndOperator(s"SELECT $left $op $right FROM tbl")
1269+
val df = sql(s"SELECT $left $op $right FROM tbl")
1270+
checkSparkSchema(df)
1271+
checkSparkAnswerAndOperator(df)
12671272
}
12681273
}
12691274
}
12701275
}
12711276
}
12721277
}
12731278

1279+
test("decimal division result type matches Spark") {
1280+
// Regression test for Comet applying DecimalPrecision.promote() on Spark 4, which overrides
1281+
// Spark's computed result type. For example, decimal(27,2)/decimal(27,2) should produce
1282+
// decimal(38,20) per Spark 4 semantics, but promote() would give decimal(38,11).
1283+
// checkSparkAnswerAndOperator verifies both the schema and the numeric values match.
1284+
withTable("tbl") {
1285+
sql("CREATE TABLE tbl (a INT) USING PARQUET")
1286+
sql("INSERT INTO tbl VALUES (1)")
1287+
withSQLConf(
1288+
CometConf.COMET_ENABLED.key -> "true",
1289+
"spark.sql.optimizer.excludedRules" ->
1290+
"org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
1291+
// (27, 2) hits the overflow-adjustment path where promote() and Spark 4 diverge
1292+
val combinations = Seq((5, 2), (18, 10), (27, 2), (38, 4))
1293+
for ((p, s) <- combinations) {
1294+
val df =
1295+
sql(s"SELECT CAST(1.00 AS DECIMAL($p, $s)) / CAST(3.00 AS DECIMAL($p, $s)) FROM tbl")
1296+
checkSparkSchema(df)
1297+
checkSparkAnswerAndOperator(df)
1298+
}
1299+
}
1300+
}
1301+
}
1302+
12741303
test("scalar decimal overflow - legacy mode produces null") {
12751304
// 1.1e19 * 1.1e19 = 1.21e38 fits in i128 (max ~1.7e38) but exceeds DECIMAL(38,0)'s
12761305
// max of 10^38-1, so CheckOverflow nulls the result in legacy (non-ANSI) mode.

spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueryTestSuite.scala

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import org.apache.spark.sql.internal.SQLConf
3030
import org.apache.spark.sql.test.TestSparkSession
3131

3232
import org.apache.comet.CometConf
33+
import org.apache.comet.CometSparkSessionExtensions.isSpark40Plus
3334

3435
/**
3536
* Because we need to modify some methods of Spark `TPCDSQueryTestSuite` but they are private, we
@@ -68,8 +69,7 @@ class CometTPCDSQueryTestSuite extends QueryTest with TPCDSBase with CometSQLQue
6869
protected val baseResourcePath: String = {
6970
// use the same way as `SQLQueryTestSuite` to get the resource path
7071
getWorkspaceFilePath(
71-
"sql",
72-
"core",
72+
"spark",
7373
"src",
7474
"test",
7575
"resources",
@@ -161,7 +161,9 @@ class CometTPCDSQueryTestSuite extends QueryTest with TPCDSBase with CometSQLQue
161161
val configs = conf.map { case (k, v) =>
162162
s"$k=$v"
163163
}
164-
throw new Exception(s"${e.getMessage}\nError using configs:\n${configs.mkString("\n")}")
164+
throw new Exception(
165+
s"${e.getMessage}\nError using configs:\n${configs.mkString("\n")}",
166+
e)
165167
}
166168
}
167169
}
@@ -232,7 +234,10 @@ class CometTPCDSQueryTestSuite extends QueryTest with TPCDSBase with CometSQLQue
232234
s"tpcds-v2.7.0/$name.sql",
233235
classLoader = Thread.currentThread().getContextClassLoader)
234236
test(s"$name-v2.7") {
235-
val goldenFile = new File(s"$baseResourcePath/v2_7", s"$name.sql.out")
237+
val spark4File = new File(s"$baseResourcePath/v2_7-spark4_0", s"$name.sql.out")
238+
val goldenFile =
239+
if (isSpark40Plus && spark4File.exists()) spark4File
240+
else new File(s"$baseResourcePath/v2_7", s"$name.sql.out")
236241
joinConfs.foreach { conf =>
237242
val sortMergeJoin = sortMergeJoinConf == conf
238243
// Skip q72 for sort-merge join because it uses too many resources

spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,21 @@ abstract class CometTestBase
190190
internalCheckSparkAnswer(df, assertCometNative = false, withTol = Some(absTol))
191191
}
192192

193+
/**
194+
* Assert that the schema produced by a Comet-enabled execution matches the schema produced by
195+
* vanilla Spark for the same logical plan. Useful for catching regressions where Comet modifies
196+
* expression result types (e.g. decimal precision promotion).
197+
*/
198+
protected def checkSparkSchema(df: DataFrame): Unit = {
199+
var sparkSchema: StructType = null
200+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
201+
sparkSchema = datasetOfRows(spark, df.logicalPlan).schema
202+
}
203+
assert(
204+
df.schema == sparkSchema,
205+
s"Schema mismatch:\nSpark: $sparkSchema\nComet: ${df.schema}")
206+
}
207+
193208
/**
194209
* Check that the query returns the correct results when Comet is enabled and that Comet
195210
* replaced all possible operators. Use the provided `absTol` when comparing floating-point

0 commit comments

Comments
 (0)