Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/user-guide/latest/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,15 @@ The following cast operations are generally compatible with Spark except for the
| integer | long | |
| integer | float | |
| integer | double | |
| integer | decimal | |
| integer | string | |
| long | boolean | |
| long | byte | |
| long | short | |
| long | integer | |
| long | float | |
| long | double | |
| long | decimal | |
| long | string | |
| float | boolean | |
| float | byte | |
Expand Down Expand Up @@ -226,8 +228,6 @@ The following cast operations are not compatible with Spark for all inputs and a
<!--BEGIN:INCOMPAT_CAST_TABLE-->
| From Type | To Type | Notes |
|-|-|-|
| integer | decimal | No overflow check |
| long | decimal | No overflow check |
| float | decimal | There can be rounding differences |
| double | decimal | There can be rounding differences |
| string | float | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. |
Expand Down
113 changes: 108 additions & 5 deletions native/spark-expr/src/conversion_funcs/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ use crate::utils::array_with_timezone;
use crate::{timezone, BinaryOutputStyle};
use crate::{EvalMode, SparkError, SparkResult};
use arrow::array::builder::StringBuilder;
use arrow::array::{DictionaryArray, GenericByteArray, StringArray, StructArray};
use arrow::array::{
Decimal128Builder, DictionaryArray, GenericByteArray, StringArray, StructArray,
};
use arrow::compute::can_cast_types;
use arrow::datatypes::{
ArrowDictionaryKeyType, ArrowNativeType, DataType, GenericBinaryType, Schema,
Expand Down Expand Up @@ -983,6 +985,9 @@ fn cast_array(
{
spark_cast_int_to_int(&array, eval_mode, from_type, to_type)
}
(Int8 | Int16 | Int32 | Int64, Decimal128(precision, scale)) => {
cast_int_to_decimal128(&array, eval_mode, from_type, to_type, *precision, *scale)
}
(Utf8, Int8 | Int16 | Int32 | Int64) => {
cast_string_to_int::<i32>(to_type, &array, eval_mode)
}
Expand Down Expand Up @@ -1143,9 +1148,6 @@ fn is_datafusion_spark_compatible(
| DataType::Utf8
),
DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
// note that the cast from Int32/Int64 -> Decimal128 here is actually
// not compatible with Spark (no overflow checks) but we have tests that
// rely on this cast working, so we have to leave it here for now
matches!(
to_type,
DataType::Boolean
Expand All @@ -1155,7 +1157,6 @@ fn is_datafusion_spark_compatible(
| DataType::Int64
| DataType::Float32
| DataType::Float64
| DataType::Decimal128(_, _)
| DataType::Utf8
)
}
Expand Down Expand Up @@ -1464,6 +1465,108 @@ where
cast_float_to_string!(from, _eval_mode, f32, Float32Array, OffsetSize)
}

fn cast_int_to_decimal128_internal<T>(
array: &PrimitiveArray<T>,
precision: u8,
scale: i8,
eval_mode: EvalMode,
) -> SparkResult<ArrayRef>
where
T: ArrowPrimitiveType,
T::Native: Into<i128>,
{
let mut builder = Decimal128Builder::with_capacity(array.len());
let multiplier = 10_i128.pow(scale as u32);

for i in 0..array.len() {
if array.is_null(i) {
builder.append_null();
} else {
let v = array.value(i).into();
let scaled = v.checked_mul(multiplier);
match scaled {
Some(scaled) => {
if !is_validate_decimal_precision(scaled, precision) {
match eval_mode {
EvalMode::Ansi => {
return Err(SparkError::NumericValueOutOfRange {
value: v.to_string(),
precision,
scale,
});
}
EvalMode::Try | EvalMode::Legacy => builder.append_null(),
}
} else {
builder.append_value(scaled);
}
}
_ => match eval_mode {
EvalMode::Ansi => {
return Err(SparkError::NumericValueOutOfRange {
value: v.to_string(),
precision,
scale,
})
}
EvalMode::Legacy | EvalMode::Try => builder.append_null(),
},
}
}
}
Ok(Arc::new(
builder.with_precision_and_scale(precision, scale)?.finish(),
))
}

fn cast_int_to_decimal128(
array: &dyn Array,
eval_mode: EvalMode,
from_type: &DataType,
to_type: &DataType,
precision: u8,
scale: i8,
) -> SparkResult<ArrayRef> {
match (from_type, to_type) {
(DataType::Int8, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int8Type>(
array.as_primitive::<Int8Type>(),
precision,
scale,
eval_mode,
)
}
(DataType::Int16, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int16Type>(
array.as_primitive::<Int16Type>(),
precision,
scale,
eval_mode,
)
}
(DataType::Int32, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int32Type>(
array.as_primitive::<Int32Type>(),
precision,
scale,
eval_mode,
)
}
(DataType::Int64, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int64Type>(
array.as_primitive::<Int64Type>(),
precision,
scale,
eval_mode,
)
}
_ => Err(SparkError::Internal(format!(
"Unsupported cast from datatype : {}",
from_type
))),
}
}

fn spark_cast_int_to_int(
array: &dyn Array,
eval_mode: EvalMode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
case DataTypes.FloatType | DataTypes.DoubleType =>
Compatible()
case _: DecimalType =>
Incompatible(Some("No overflow check"))
Compatible()
case _ =>
unsupported(DataTypes.IntegerType, toType)
}
Expand All @@ -297,7 +297,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
case DataTypes.FloatType | DataTypes.DoubleType =>
Compatible()
case _: DecimalType =>
Incompatible(Some("No overflow check"))
Compatible()
case _ =>
unsupported(DataTypes.LongType, toType)
}
Expand Down
21 changes: 16 additions & 5 deletions spark/src/test/scala/org/apache/comet/CometCastSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,24 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
castTest(generateInts(), DataTypes.DoubleType)
}

ignore("cast IntegerType to DecimalType(10,2)") {
// Comet should have failed with [NUMERIC_VALUE_OUT_OF_RANGE] -1117686336 cannot be represented as Decimal(10, 2)
test("cast IntegerType to DecimalType(10,2)") {
castTest(generateInts(), DataTypes.createDecimalType(10, 2))
}

test("cast IntegerType to DecimalType(10,2) overflow check") {
val intToDecimal10OverflowValues =
Seq(Int.MinValue, -100000000, -100000001, 100000000, 100000001, Int.MaxValue).toDF("a")
castTest(intToDecimal10OverflowValues, DataTypes.createDecimalType(10, 2))
}

test("cast IntegerType to DecimalType check arbitrary scale and precision") {
Seq(DecimalType.MAX_PRECISION, DecimalType.MAX_SCALE, 0, 10, 15)
.combinations(2)
.map({ c =>
castTest(generateInts(), DataTypes.createDecimalType(c.head, c.last))
})
}

test("cast IntegerType to StringType") {
castTest(generateInts(), DataTypes.StringType)
}
Expand Down Expand Up @@ -369,8 +382,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
castTest(generateLongs(), DataTypes.DoubleType)
}

ignore("cast LongType to DecimalType(10,2)") {
// Comet should have failed with [NUMERIC_VALUE_OUT_OF_RANGE] -1117686336 cannot be represented as Decimal(10, 2)
test("cast LongType to DecimalType(10,2)") {
castTest(generateLongs(), DataTypes.createDecimalType(10, 2))
}

Expand Down Expand Up @@ -1232,7 +1244,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
case (None, None) =>
// neither system threw an exception
case (None, Some(e)) =>
// Spark succeeded but Comet failed
throw e
case (Some(e), None) =>
// Spark failed but Comet succeeded
Expand Down
Loading