Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/user-guide/latest/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,15 @@ The following cast operations are generally compatible with Spark except for the
| integer | long | |
| integer | float | |
| integer | double | |
| integer | decimal | |
| integer | string | |
| long | boolean | |
| long | byte | |
| long | short | |
| long | integer | |
| long | float | |
| long | double | |
| long | decimal | |
| long | string | |
| float | boolean | |
| float | byte | |
Expand Down Expand Up @@ -226,8 +228,6 @@ The following cast operations are not compatible with Spark for all inputs and a
<!--BEGIN:INCOMPAT_CAST_TABLE-->
| From Type | To Type | Notes |
|-|-|-|
| integer | decimal | No overflow check |
| long | decimal | No overflow check |
| float | decimal | There can be rounding differences |
| double | decimal | There can be rounding differences |
| string | float | Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. Does not support ANSI mode. |
Expand Down
109 changes: 104 additions & 5 deletions native/spark-expr/src/conversion_funcs/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ use crate::utils::array_with_timezone;
use crate::{timezone, BinaryOutputStyle};
use crate::{EvalMode, SparkError, SparkResult};
use arrow::array::builder::StringBuilder;
use arrow::array::{DictionaryArray, GenericByteArray, StringArray, StructArray};
use arrow::array::{
Decimal128Builder, DictionaryArray, GenericByteArray, StringArray, StructArray,
};
use arrow::compute::can_cast_types;
use arrow::datatypes::{
ArrowDictionaryKeyType, ArrowNativeType, DataType, GenericBinaryType, Schema,
Expand Down Expand Up @@ -983,6 +985,9 @@ fn cast_array(
{
spark_cast_int_to_int(&array, eval_mode, from_type, to_type)
}
(Int8 | Int16 | Int32 | Int64, Decimal128(precision, scale)) => {
cast_int_to_decimal128(&array, eval_mode, from_type, to_type, *precision, *scale)
}
(Utf8, Int8 | Int16 | Int32 | Int64) => {
cast_string_to_int::<i32>(to_type, &array, eval_mode)
}
Expand Down Expand Up @@ -1143,9 +1148,6 @@ fn is_datafusion_spark_compatible(
| DataType::Utf8
),
DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
// note that the cast from Int32/Int64 -> Decimal128 here is actually
// not compatible with Spark (no overflow checks) but we have tests that
// rely on this cast working, so we have to leave it here for now
matches!(
to_type,
DataType::Boolean
Expand All @@ -1155,7 +1157,6 @@ fn is_datafusion_spark_compatible(
| DataType::Int64
| DataType::Float32
| DataType::Float64
| DataType::Decimal128(_, _)
| DataType::Utf8
)
}
Expand Down Expand Up @@ -1464,6 +1465,104 @@ where
cast_float_to_string!(from, _eval_mode, f32, Float32Array, OffsetSize)
}

fn cast_int_to_decimal128_internal<T>(
array: &PrimitiveArray<T>,
precision: u8,
scale: i8,
eval_mode: EvalMode,
) -> SparkResult<ArrayRef>
where
T: ArrowPrimitiveType,
T::Native: Into<i128>,
{
let mut builder = Decimal128Builder::with_capacity(array.len());
let multiplier = 10_i128.pow(scale as u32);

for i in 0..array.len() {
if array.is_null(i) {
builder.append_null();
} else {
let v = array.value(i).into();
let scaled = v.checked_mul(multiplier);
match scaled {
Some(scaled) => {
if !is_validate_decimal_precision(scaled, precision) {
match eval_mode {
EvalMode::Ansi => {
return Err(SparkError::NumericValueOutOfRange {
value: v.to_string(),
precision,
scale,
});
}
EvalMode::Try | EvalMode::Legacy => builder.append_null(),
}
} else {
builder.append_value(scaled);
}
}
_ => {
return Err(SparkError::NumericValueOutOfRange {
value: v.to_string(),
precision,
scale,
})
}
}
}
}
Ok(Arc::new(
builder.with_precision_and_scale(precision, scale)?.finish(),
))
}
fn cast_int_to_decimal128(
array: &dyn Array,
eval_mode: EvalMode,
from_type: &DataType,
to_type: &DataType,
precision: u8,
scale: i8,
) -> SparkResult<ArrayRef> {
match (from_type, to_type) {
(DataType::Int8, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int8Type>(
array.as_primitive::<Int8Type>(),
precision,
scale,
eval_mode,
)
}
(DataType::Int16, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int16Type>(
array.as_primitive::<Int16Type>(),
precision,
scale,
eval_mode,
)
}
(DataType::Int32, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int32Type>(
array.as_primitive::<Int32Type>(),
precision,
scale,
eval_mode,
)
}
(DataType::Int64, DataType::Decimal128(_p, _s)) => {
cast_int_to_decimal128_internal::<Int64Type>(
array.as_primitive::<Int64Type>(),
precision,
scale,
eval_mode,
)
}
_ => Err(SparkError::Internal(format!(
"Unsupported cast from datatype : {}",
from_type
))),
}
}

fn spark_cast_int_to_int(
array: &dyn Array,
eval_mode: EvalMode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
case DataTypes.FloatType | DataTypes.DoubleType =>
Compatible()
case _: DecimalType =>
Incompatible(Some("No overflow check"))
Compatible()
case _ =>
unsupported(DataTypes.IntegerType, toType)
}
Expand All @@ -297,7 +297,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
case DataTypes.FloatType | DataTypes.DoubleType =>
Compatible()
case _: DecimalType =>
Incompatible(Some("No overflow check"))
Compatible()
case _ =>
unsupported(DataTypes.LongType, toType)
}
Expand Down
6 changes: 2 additions & 4 deletions spark/src/test/scala/org/apache/comet/CometCastSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
castTest(generateInts(), DataTypes.DoubleType)
}

ignore("cast IntegerType to DecimalType(10,2)") {
// Comet should have failed with [NUMERIC_VALUE_OUT_OF_RANGE] -1117686336 cannot be represented as Decimal(10, 2)
test("cast IntegerType to DecimalType(10,2)") {
castTest(generateInts(), DataTypes.createDecimalType(10, 2))
}

Expand Down Expand Up @@ -369,8 +368,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
castTest(generateLongs(), DataTypes.DoubleType)
}

ignore("cast LongType to DecimalType(10,2)") {
// Comet should have failed with [NUMERIC_VALUE_OUT_OF_RANGE] -1117686336 cannot be represented as Decimal(10, 2)
test("cast LongType to DecimalType(10,2)") {
castTest(generateLongs(), DataTypes.createDecimalType(10, 2))
}

Expand Down
Loading