-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-43660: [C++][Compute] Avoid ZeroCopyCastExec when casting Binary offset -> Binary offset types #48171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-43660: [C++][Compute] Avoid ZeroCopyCastExec when casting Binary offset -> Binary offset types #48171
Changes from 7 commits
e5b0d43
dfec3b6
6966d4f
71ed279
49dfcb1
bbfd1e6
bb8c691
c8daf74
0b4c46b
8006925
4f613e4
e20f54b
6fa4898
48b0571
4929843
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
|
|
||
| #include "arrow/array/array_base.h" | ||
| #include "arrow/array/builder_binary.h" | ||
| #include "arrow/buffer.h" | ||
| #include "arrow/compute/kernels/base_arithmetic_internal.h" | ||
| #include "arrow/compute/kernels/codegen_internal.h" | ||
| #include "arrow/compute/kernels/common_internal.h" | ||
|
|
@@ -304,10 +305,33 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou | |
| } | ||
| } | ||
|
|
||
| // Start with a zero-copy cast, but change indices to expected size | ||
| RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out)); | ||
| return CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>( | ||
| ctx, input, out->array_data().get()); | ||
| if constexpr (sizeof(typename I::offset_type) != sizeof(typename O::offset_type)) { | ||
| std::shared_ptr<ArrayData> input_arr = input.ToArrayData(); | ||
| ArrayData* output = out->array_data().get(); | ||
| output->length = input_arr->length; | ||
| // output->offset is set below | ||
| output->SetNullCount(input_arr->null_count); | ||
| output->buffers = std::move(input_arr->buffers); | ||
|
|
||
| // Slice buffers to reduce allocation when casting the offsets buffer | ||
zanmato1984 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| int64_t offset = input_arr->offset; | ||
scott-routledge2 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| size_t input_offset_type_size = sizeof(typename I::offset_type); | ||
| if (output->null_count != 0 && output->buffers[0]) { | ||
| // Avoid reallocation of the validity buffer by allowing some padding bits | ||
| output->offset = input_arr->offset % 8; | ||
scott-routledge2 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } else { | ||
| output->offset = 0; | ||
| } | ||
| if (output->buffers[0]) { | ||
| output->buffers[0] = SliceBuffer(output->buffers[0], offset / 8); | ||
| } | ||
| output->buffers[1] = SliceBuffer(output->buffers[1], offset * input_offset_type_size); | ||
|
||
|
|
||
| return CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>( | ||
| ctx, input, out->array_data().get()); | ||
| } else { | ||
| return ZeroCopyCastExec(ctx, batch, out); | ||
| } | ||
| } | ||
|
|
||
| // String View -> Offset String | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.