Skip to content

Commit 4cfe20f

Browse files
authored
Allocate a buffer of the correct length for ScalarValue::FixedSizeBinary in ScalarValue::to_array_of_size (#18903)
## Which issue does this PR close? - Closes #18870. We should open a follow-up issue that tracks re-use `FixedSizeBinaryArray::new_null` after upgrading arrow-rs. With this PR, we could backport it to #18843 if we wish. Alternatively, we could also wait for a fix by arrow-rs. ## Rationale for this change As we will have to wait for another arrow-rs update to get the fix for the issue apache/arrow-rs#8901 , we have a temporaray fix that basically calls the same operations from DataFusion's code. Once the fix is in the arrow-rs codebase, we can re-use `FixedSizeBinaryArray::new_null`. ## What changes are included in this PR? - A test. (`assert_eq!(result.as_fixed_size_binary().values().len(), 10);` returned 0 before) - Directly allocate the values buffer in DataFusion. ## Are these changes tested? Yes ## Are there any user-facing changes? Yes, the buffer will now be allocated, zeroed, and set to the expected length. Same as in the arrow-rs fix.
1 parent e54eb42 commit 4cfe20f

File tree

1 file changed

+30
-11
lines changed
  • datafusion/common/src/scalar

1 file changed

+30
-11
lines changed

datafusion/common/src/scalar/mod.rs

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,16 @@ use arrow::array::{
6060
BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array, Decimal32Array,
6161
Decimal64Array, Decimal128Array, Decimal256Array, DictionaryArray,
6262
DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
63-
DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array,
64-
Float32Array, Float64Array, GenericListArray, Int8Array, Int16Array, Int32Array,
65-
Int64Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
66-
LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray,
67-
MutableArrayData, OffsetSizeTrait, PrimitiveArray, Scalar, StringArray,
68-
StringViewArray, StructArray, Time32MillisecondArray, Time32SecondArray,
69-
Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
70-
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
71-
UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray, new_empty_array,
72-
new_null_array,
63+
DurationSecondArray, FixedSizeBinaryArray, FixedSizeBinaryBuilder,
64+
FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
65+
Int8Array, Int16Array, Int32Array, Int64Array, IntervalDayTimeArray,
66+
IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
67+
LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait,
68+
PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray,
69+
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
70+
Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
71+
TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array,
72+
UInt64Array, UnionArray, new_empty_array, new_null_array,
7373
};
7474
use arrow::buffer::{BooleanBuffer, ScalarBuffer};
7575
use arrow::compute::kernels::cast::{CastOptions, cast_with_options};
@@ -3059,7 +3059,14 @@ impl ScalarValue {
30593059
)
30603060
.unwrap(),
30613061
),
3062-
None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
3062+
None => {
3063+
// TODO: Replace with FixedSizeBinaryArray::new_null once a fix for
3064+
// https://github.com/apache/arrow-rs/issues/8900 is in the used arrow-rs
3065+
// version.
3066+
let mut builder = FixedSizeBinaryBuilder::new(*s);
3067+
builder.append_nulls(size);
3068+
Arc::new(builder.finish())
3069+
}
30633070
},
30643071
ScalarValue::LargeBinary(e) => match e {
30653072
Some(value) => Arc::new(
@@ -5317,6 +5324,18 @@ mod tests {
53175324
assert_eq!(empty_array.len(), 0);
53185325
}
53195326

5327+
/// See https://github.com/apache/datafusion/issues/18870
5328+
#[test]
5329+
fn test_to_array_of_size_for_none_fsb() {
5330+
let sv = ScalarValue::FixedSizeBinary(5, None);
5331+
let result = sv
5332+
.to_array_of_size(2)
5333+
.expect("Failed to convert to array of size");
5334+
assert_eq!(result.len(), 2);
5335+
assert_eq!(result.null_count(), 2);
5336+
assert_eq!(result.as_fixed_size_binary().values().len(), 10);
5337+
}
5338+
53205339
#[test]
53215340
fn test_list_to_array_string() {
53225341
let scalars = vec![

0 commit comments

Comments
 (0)