Skip to content

Commit 1ed53be

Browse files
committed
Clarify RecordBatch clone overhead: ~100 bytes/batch, not data size
1 parent 8a94908 commit 1ed53be

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/mem_buffer.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,9 @@ impl MemBuffer {
230230
{
231231
for bucket_entry in table.buckets.iter() {
232232
if let Ok(batches) = bucket_entry.batches.read() {
233-
// RecordBatch uses Arc internally - clone is O(columns), not O(data)
233+
// RecordBatch clone is cheap: Arc<Schema> + Vec<Arc<Array>>
234+
// Only clones pointers (~100 bytes/batch), NOT the underlying data
235+
// A 4GB buffer query adds ~1MB overhead, not 4GB
234236
results.extend(batches.iter().cloned());
235237
}
236238
}
@@ -258,7 +260,7 @@ impl MemBuffer {
258260
&& let Ok(batches) = bucket.batches.read()
259261
&& !batches.is_empty()
260262
{
261-
// RecordBatch uses Arc internally - clone is O(columns), not O(data)
263+
// RecordBatch clone is cheap (~100 bytes/batch), data is Arc-shared
262264
partitions.push(batches.clone());
263265
}
264266
}

0 commit comments

Comments
 (0)