Skip to content

Commit 6d9b76e

Browse files
authored
Perf: Port arrow-rs optimization for get_buffer_memory_size and add fast path for no buffer for gc string view (#17008)
* Port arrow-rs optimization for get_buffer_memory_size for gc string view * add comments and fast path
1 parent b4e7147 commit 6d9b76e

File tree

1 file changed

+11
-1
lines changed
  • datafusion/physical-plan/src/coalesce

1 file changed

+11
-1
lines changed

datafusion/physical-plan/src/coalesce/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,12 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
228228
let Some(s) = c.as_string_view_opt() else {
229229
return Arc::clone(c);
230230
};
231+
232+
// Fast path: if the data buffers are empty, we can return the original array
233+
if s.data_buffers().is_empty() {
234+
return Arc::clone(c);
235+
}
236+
231237
let ideal_buffer_size: usize = s
232238
.views()
233239
.iter()
@@ -240,7 +246,11 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
240246
}
241247
})
242248
.sum();
243-
let actual_buffer_size = s.get_buffer_memory_size();
249+
250+
// We don't use get_buffer_memory_size here, because gc is for the contents of the
251+
// data buffers, not views and nulls.
252+
let actual_buffer_size =
253+
s.data_buffers().iter().map(|b| b.capacity()).sum::<usize>();
244254

245255
// Re-creating the array copies data and can be time consuming.
246256
// We only do it if the array is sparse

0 commit comments

Comments
 (0)