Skip to content

Commit 42648f4

Browse files
committed
review comments and fix failing test.
1 parent 99727b8 commit 42648f4

File tree

2 files changed

+71
-66
lines changed

2 files changed

+71
-66
lines changed

docs/source/user-guide/compatibility.md

Lines changed: 54 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -73,76 +73,77 @@ Spark.
7373
The following cast operations are generally compatible with Spark except for the differences noted here.
7474

7575
| From Type | To Type | Notes |
76-
|-|-|-|
77-
| boolean | byte | |
78-
| boolean | short | |
76+
|-|---------|-|
77+
| boolean | byte | |
78+
| boolean | short | |
7979
| boolean | integer | |
80-
| boolean | long | |
81-
| boolean | float | |
82-
| boolean | double | |
83-
| boolean | string | |
80+
| boolean | long | |
81+
| boolean | float | |
82+
| boolean | double | |
83+
| boolean | string | |
8484
| byte | boolean | |
85-
| byte | short | |
85+
| byte | short | |
8686
| byte | integer | |
87-
| byte | long | |
88-
| byte | float | |
89-
| byte | double | |
87+
| byte | long | |
88+
| byte | float | |
89+
| byte | double | |
9090
| byte | decimal | |
91-
| byte | string | |
91+
| byte | string | |
9292
| short | boolean | |
93-
| short | byte | |
93+
| short | byte | |
9494
| short | integer | |
95-
| short | long | |
96-
| short | float | |
97-
| short | double | |
95+
| short | long | |
96+
| short | float | |
97+
| short | double | |
9898
| short | decimal | |
99-
| short | string | |
99+
| short | string | |
100100
| integer | boolean | |
101-
| integer | byte | |
102-
| integer | short | |
103-
| integer | long | |
104-
| integer | float | |
105-
| integer | double | |
106-
| integer | string | |
101+
| integer | byte | |
102+
| integer | short | |
103+
| integer | long | |
104+
| integer | float | |
105+
| integer | double | |
106+
| integer | string | |
107107
| long | boolean | |
108-
| long | byte | |
109-
| long | short | |
108+
| long | byte | |
109+
| long | short | |
110110
| long | integer | |
111-
| long | float | |
112-
| long | double | |
113-
| long | string | |
111+
| long | float | |
112+
| long | double | |
113+
| long | string | |
114114
| float | boolean | |
115-
| float | byte | |
116-
| float | short | |
115+
| float | byte | |
116+
| float | short | |
117117
| float | integer | |
118-
| float | long | |
119-
| float | double | |
120-
| float | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
118+
| float | long | |
119+
| float | double | |
120+
| float | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
121121
| double | boolean | |
122-
| double | byte | |
123-
| double | short | |
122+
| double | byte | |
123+
| double | short | |
124124
| double | integer | |
125-
| double | long | |
126-
| double | float | |
127-
| double | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
128-
| decimal | byte | |
129-
| decimal | short | |
125+
| double | long | |
126+
| double | float | |
127+
| double | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
128+
| decimal | byte | |
129+
| decimal | short | |
130130
| decimal | integer | |
131-
| decimal | long | |
132-
| decimal | float | |
133-
| decimal | double | |
134-
| decimal | string | There can be formatting differences in some case due to Spark using scientific notation where Comet does not |
131+
| decimal | long | |
132+
| decimal | float | |
133+
| decimal | double | |
134+
| decimal | string | There can be formatting differences in some case due to Spark using scientific notation where Comet does not |
135+
| decimal | decimal | |
135136
| string | boolean | |
136-
| string | byte | |
137-
| string | short | |
137+
| string | byte | |
138+
| string | short | |
138139
| string | integer | |
139-
| string | long | |
140-
| string | binary | |
141-
| string | date | Only supports years between 262143 BC and 262142 AD |
142-
| date | string | |
143-
| timestamp | long | |
144-
| timestamp | string | |
145-
| timestamp | date | |
140+
| string | long | |
141+
| string | binary | |
142+
| string | date | Only supports years between 262143 BC and 262142 AD |
143+
| date | string | |
144+
| timestamp | long | |
145+
| timestamp | string | |
146+
| timestamp | date | |
146147

147148
### Incompatible Casts
148149

spark/src/test/scala/org/apache/comet/CometCastSuite.scala

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ import scala.util.Random
2525
import scala.util.matching.Regex
2626

2727
import org.apache.hadoop.fs.Path
28-
import org.apache.spark.sql.{CometTestBase, DataFrame, SaveMode}
28+
import org.apache.spark.sql.{CometTestBase, DataFrame, Row, SaveMode}
2929
import org.apache.spark.sql.catalyst.expressions.Cast
3030
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
3131
import org.apache.spark.sql.functions.col
3232
import org.apache.spark.sql.internal.SQLConf
33-
import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType}
33+
import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType, StructField, StructType}
3434

3535
import org.apache.comet.expressions.{CometCast, CometEvalMode, Compatible}
3636

@@ -909,11 +909,14 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
909909
}
910910

911911
test("cast between decimals with different precision and scale") {
912-
// cast between default Decimal(38, 18) to Decimal(6,2)
913-
val values = Seq(BigDecimal("12345.6789"), BigDecimal("9876.5432"), BigDecimal("123.4567"))
914-
val df = withNulls(values)
915-
.toDF("b")
916-
.withColumn("a", col("b").cast(DecimalType(38, 28)))
912+
val rowData = Seq(
913+
Row(BigDecimal("12345.6789")),
914+
Row(BigDecimal("9876.5432")),
915+
Row(BigDecimal("123.4567")))
916+
val df = spark.createDataFrame(
917+
spark.sparkContext.parallelize(rowData),
918+
StructType(Seq(StructField("a", DataTypes.createDecimalType(10, 4)))))
919+
917920
castTest(df, DecimalType(6, 2))
918921
}
919922

@@ -1126,12 +1129,11 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
11261129
val cometMessage =
11271130
if (cometException.getCause != null) cometException.getCause.getMessage
11281131
else cometException.getMessage
1129-
// for comet decimal conversion throws ArrowError(string) from arrow - across spark version the message dont match.
1130-
if (sparkMessage.contains("NUMERIC_VALUE_OUT_OF_RANGE") && cometMessage.contains(
1131-
"Invalid argument error")) {
1132+
// for comet decimal conversion throws ArrowError(string) from arrow - across spark versions the message dont match.
1133+
if (sparkMessage.contains("cannot be represented as")) {
11321134
assert(
1133-
sparkMessage.contains("cannot be represented as"),
1134-
cometMessage.contains("too large to store"))
1135+
cometMessage.contains("cannot be represented as") || cometMessage.contains(
1136+
"too large to store"))
11351137
} else {
11361138
if (CometSparkSessionExtensions.isSpark40Plus) {
11371139
// for Spark 4 we expect to sparkException carries the message
@@ -1151,7 +1153,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
11511153
.replace("[NUMERIC_VALUE_OUT_OF_RANGE] ", "")
11521154

11531155
if (sparkMessage.contains("cannot be represented as")) {
1154-
assert(cometMessage.contains("cannot be represented as"))
1156+
assert(
1157+
cometMessage.contains("cannot be represented as") || cometMessage.contains(
1158+
"too large to store"))
11551159
} else {
11561160
assert(cometMessageModified == sparkMessage)
11571161
}

0 commit comments

Comments
 (0)