Skip to content

Commit 48b0571

Browse files
add slice with length test and make comment more verbose
1 parent 6fa4898 commit 48b0571

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

cpp/src/arrow/compute/kernels/scalar_cast_string.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,14 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
313313
std::shared_ptr<ArrayData> input_arr = input.ToArrayData();
314314
ArrayData* output = out->array_data().get();
315315

316-
// Slice buffers to reduce allocation when casting the offsets buffer
316+
// Slice buffers to minimize the output's offset. We need a small offset because
317+
// CastBinaryToBinaryOffsets() will reallocate the offsets buffer with size
318+
// (out_length + out_offset + 1) * sizeof(offset_type).
317319
int64_t input_offset = input_arr->offset;
318320
size_t input_offset_type_size = sizeof(typename I::offset_type);
319321
if (output->null_count != 0 && output->buffers[0]) {
320322
// Avoid reallocation of the validity buffer by allowing some padding bits
321-
output->offset = input_arr->offset % 8;
323+
output->offset = input_offset % 8;
322324
} else {
323325
output->offset = 0;
324326
}

cpp/src/arrow/compute/kernels/scalar_cast_test.cc

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3402,9 +3402,15 @@ TEST(Cast, StringToString) {
34023402

34033403
TEST(Cast, StringToStringWithOffset) {
34043404
// GH-43660: Check casting String Arrays with nonzero offset
3405+
std::vector<int64_t> offsets = {3, 8, 10, 12};
3406+
std::vector<int64_t> lengths = {5, 2, 1, 0};
3407+
34053408
for (auto from_type : {utf8(), large_utf8()}) {
34063409
for (auto to_type : {utf8(), large_utf8()}) {
3407-
for (int64_t offset : {3, 8, 10, 12}) {
3410+
for (size_t i = 0; i < offsets.size(); ++i) {
3411+
auto offset = offsets[i];
3412+
auto length = lengths[i];
3413+
34083414
auto input_with_nulls = R"([
34093415
"foo", null, "bar", null, "quu", "foo", "baz", "bar",
34103416
null, "bar", "baz", null
@@ -3414,6 +3420,9 @@ TEST(Cast, StringToStringWithOffset) {
34143420
auto output_arr_with_nulls = ArrayFromJSON(to_type, input_with_nulls);
34153421
CheckCast(input_arr_with_nulls->Slice(offset),
34163422
output_arr_with_nulls->Slice(offset));
3423+
// Slice with length
3424+
CheckCast(input_arr_with_nulls->Slice(offset, length),
3425+
output_arr_with_nulls->Slice(offset, length));
34173426

34183427
auto input_no_nulls = R"([
34193428
"foo", "aa", "bar", "bb", "quu", "foo", "baz", "bar",
@@ -3423,6 +3432,9 @@ TEST(Cast, StringToStringWithOffset) {
34233432
auto input_arr_no_nulls = ArrayFromJSON(from_type, input_no_nulls);
34243433
auto output_arr_no_nulls = ArrayFromJSON(to_type, input_no_nulls);
34253434
CheckCast(input_arr_no_nulls->Slice(offset), output_arr_no_nulls->Slice(offset));
3435+
// Slice with length
3436+
CheckCast(input_arr_no_nulls->Slice(offset, length),
3437+
output_arr_no_nulls->Slice(offset, length));
34263438
}
34273439
}
34283440
}

0 commit comments

Comments
 (0)