Skip to content

Commit 26b8d57

Browse files
authored
fix: Mark cast from float/double to decimal as incompatible (#1372)
* add failing test * Mark cast from float/double to decimal as incompat * update docs * update cast tests * link to issue * fix regressions * use unique table name in test * use withTable * address feedback
1 parent 19c4405 commit 26b8d57

File tree

5 files changed

+58
-17
lines changed

5 files changed

+58
-17
lines changed

docs/source/user-guide/compatibility.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,13 @@ The following cast operations are generally compatible with Spark except for the
117117
| float | integer | |
118118
| float | long | |
119119
| float | double | |
120-
| float | decimal | |
121120
| float | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
122121
| double | boolean | |
123122
| double | byte | |
124123
| double | short | |
125124
| double | integer | |
126125
| double | long | |
127126
| double | float | |
128-
| double | decimal | |
129127
| double | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
130128
| decimal | byte | |
131129
| decimal | short | |
@@ -154,6 +152,8 @@ The following cast operations are not compatible with Spark for all inputs and a
154152
|-|-|-|
155153
| integer | decimal | No overflow check |
156154
| long | decimal | No overflow check |
155+
| float | decimal | There can be rounding differences |
156+
| double | decimal | There can be rounding differences |
157157
| string | float | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. |
158158
| string | double | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. |
159159
| string | decimal | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. Returns 0.0 instead of null if input contains no digits |

spark/src/main/scala/org/apache/comet/expressions/CometCast.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,19 @@ object CometCast {
267267
case DataTypes.BooleanType | DataTypes.DoubleType | DataTypes.ByteType | DataTypes.ShortType |
268268
DataTypes.IntegerType | DataTypes.LongType =>
269269
Compatible()
270-
case _: DecimalType => Compatible()
270+
case _: DecimalType =>
271+
// https://github.com/apache/datafusion-comet/issues/1371
272+
Incompatible(Some("There can be rounding differences"))
271273
case _ => Unsupported
272274
}
273275

274276
private def canCastFromDouble(toType: DataType): SupportLevel = toType match {
275277
case DataTypes.BooleanType | DataTypes.FloatType | DataTypes.ByteType | DataTypes.ShortType |
276278
DataTypes.IntegerType | DataTypes.LongType =>
277279
Compatible()
278-
case _: DecimalType => Compatible()
280+
case _: DecimalType =>
281+
// https://github.com/apache/datafusion-comet/issues/1371
282+
Incompatible(Some("There can be rounding differences"))
279283
case _ => Unsupported
280284
}
281285

spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@ object ParquetGenerator {
212212
}
213213

214214
case class DataGenOptions(
215-
allowNull: Boolean,
216-
generateNegativeZero: Boolean,
217-
generateArray: Boolean,
218-
generateStruct: Boolean,
219-
generateMap: Boolean)
215+
allowNull: Boolean = true,
216+
generateNegativeZero: Boolean = true,
217+
generateArray: Boolean = false,
218+
generateStruct: Boolean = false,
219+
generateMap: Boolean = false)

spark/src/test/scala/org/apache/comet/CometCastSuite.scala

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,17 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
348348
castTest(generateFloats(), DataTypes.DoubleType)
349349
}
350350

351-
test("cast FloatType to DecimalType(10,2)") {
351+
ignore("cast FloatType to DecimalType(10,2)") {
352+
// // https://github.com/apache/datafusion-comet/issues/1371
352353
castTest(generateFloats(), DataTypes.createDecimalType(10, 2))
353354
}
354355

356+
test("cast FloatType to DecimalType(10,2) - allow incompat") {
357+
withSQLConf(CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
358+
castTest(generateFloats(), DataTypes.createDecimalType(10, 2))
359+
}
360+
}
361+
355362
test("cast FloatType to StringType") {
356363
// https://github.com/apache/datafusion-comet/issues/312
357364
val r = new Random(0)
@@ -401,10 +408,17 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
401408
castTest(generateDoubles(), DataTypes.FloatType)
402409
}
403410

404-
test("cast DoubleType to DecimalType(10,2)") {
411+
ignore("cast DoubleType to DecimalType(10,2)") {
412+
// https://github.com/apache/datafusion-comet/issues/1371
405413
castTest(generateDoubles(), DataTypes.createDecimalType(10, 2))
406414
}
407415

416+
test("cast DoubleType to DecimalType(10,2) - allow incompat") {
417+
withSQLConf(CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
418+
castTest(generateDoubles(), DataTypes.createDecimalType(10, 2))
419+
}
420+
}
421+
408422
test("cast DoubleType to StringType") {
409423
// https://github.com/apache/datafusion-comet/issues/312
410424
val r = new Random(0)

spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,35 @@ import org.apache.spark.sql.internal.SQLConf
3232

3333
import org.apache.comet.CometConf
3434
import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
35+
import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
3536

3637
/**
3738
* Test suite dedicated to Comet native aggregate operator
3839
*/
3940
class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
4041
import testImplicits._
4142

43+
test("avg decimal") {
44+
withTempDir { dir =>
45+
val path = new Path(dir.toURI.toString, "test.parquet")
46+
val filename = path.toString
47+
val random = new Random(42)
48+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
49+
ParquetGenerator.makeParquetFile(random, spark, filename, 10000, DataGenOptions())
50+
}
51+
val tableName = "avg_decimal"
52+
withTable(tableName) {
53+
val table = spark.read.parquet(filename).coalesce(1)
54+
table.createOrReplaceTempView(tableName)
55+
// we fall back to Spark for avg on decimal due to the following issue
56+
// https://github.com/apache/datafusion-comet/issues/1371
57+
// once this is fixed, we should change this test to
58+
// checkSparkAnswerAndNumOfAggregates
59+
checkSparkAnswer(s"SELECT c1, avg(c7) FROM $tableName GROUP BY c1 ORDER BY c1")
60+
}
61+
}
62+
}
63+
4264
test("stddev_pop should return NaN for some cases") {
4365
withSQLConf(
4466
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
@@ -867,10 +889,11 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
867889

868890
withSQLConf(
869891
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
892+
CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true",
870893
CometConf.COMET_SHUFFLE_MODE.key -> "native") {
871894
Seq(true, false).foreach { dictionaryEnabled =>
872895
withSQLConf("parquet.enable.dictionary" -> dictionaryEnabled.toString) {
873-
val table = "t1"
896+
val table = s"final_decimal_avg_$dictionaryEnabled"
874897
withTable(table) {
875898
sql(s"create table $table(a decimal(38, 37), b INT) using parquet")
876899
sql(s"insert into $table values(-0.0000000000000000000000000000000000002, 1)")
@@ -884,13 +907,13 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
884907
sql(s"insert into $table values(0.13344406545919155429936259114971302408, 5)")
885908
sql(s"insert into $table values(0.13344406545919155429936259114971302408, 5)")
886909

887-
checkSparkAnswerAndNumOfAggregates("SELECT b , AVG(a) FROM t1 GROUP BY b", 2)
888-
checkSparkAnswerAndNumOfAggregates("SELECT AVG(a) FROM t1", 2)
910+
checkSparkAnswerAndNumOfAggregates(s"SELECT b , AVG(a) FROM $table GROUP BY b", 2)
911+
checkSparkAnswerAndNumOfAggregates(s"SELECT AVG(a) FROM $table", 2)
889912
checkSparkAnswerAndNumOfAggregates(
890-
"SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1 GROUP BY b",
913+
s"SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM $table GROUP BY b",
891914
2)
892915
checkSparkAnswerAndNumOfAggregates(
893-
"SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1",
916+
s"SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM $table",
894917
2)
895918
}
896919
}
@@ -915,7 +938,7 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
915938
withSQLConf(
916939
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
917940
CometConf.COMET_SHUFFLE_MODE.key -> "native") {
918-
val table = "t1"
941+
val table = "avg_null_handling"
919942
withTable(table) {
920943
sql(s"create table $table(a double, b double) using parquet")
921944
sql(s"insert into $table values(1, 1.0)")

0 commit comments

Comments
 (0)