Skip to content

Commit 28d874c

Browse files
authored
chore: Improve documentation for CometBatchIterator and fix a potential issue (#2168)
1 parent 0cac7d9 commit 28d874c

File tree

1 file changed

+24
-7
lines changed

1 file changed

+24
-7
lines changed

spark/src/main/java/org/apache/comet/CometBatchIterator.java

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,17 @@
2626
import org.apache.comet.vector.NativeUtil;
2727

2828
/**
29-
* An iterator that can be used to get batches of Arrow arrays from a Spark iterator of
30-
* ColumnarBatch. It will consume input iterator and return Arrow arrays by addresses. This is
31-
* called by native code to retrieve Arrow arrays from Spark through JNI.
29+
* Iterator for fetching batches from JVM to native code. Usually called via JNI from native
30+
* ScanExec.
31+
*
32+
* <p>Batches are owned by the JVM. Native code can safely access the batch after calling `next` but
33+
* the native code must not retain references to the batch because the next call to `hasNext` will
34+
* signal to the JVM that the batch can be closed.
3235
*/
3336
public class CometBatchIterator {
34-
final Iterator<ColumnarBatch> input;
35-
final NativeUtil nativeUtil;
37+
private final Iterator<ColumnarBatch> input;
38+
private final NativeUtil nativeUtil;
39+
private ColumnarBatch previousBatch = null;
3640
private ColumnarBatch currentBatch = null;
3741

3842
CometBatchIterator(Iterator<ColumnarBatch> input, NativeUtil nativeUtil) {
@@ -41,11 +45,16 @@ public class CometBatchIterator {
4145
}
4246

4347
/**
44-
* Fetch the next input batch.
48+
* Fetch the next input batch and allow the previous batch to be closed (this may not happen
49+
* immediately).
4550
*
4651
* @return Number of rows in next batch or -1 if no batches left.
4752
*/
4853
public int hasNext() {
54+
55+
// release reference to previous batch
56+
previousBatch = null;
57+
4958
if (currentBatch == null) {
5059
if (input.hasNext()) {
5160
currentBatch = input.next();
@@ -59,7 +68,7 @@ public int hasNext() {
5968
}
6069

6170
/**
62-
* Get the next batches of Arrow arrays.
71+
* Get the next batch of Arrow arrays.
6372
*
6473
* @param arrayAddrs The addresses of the ArrowArray structures.
6574
* @param schemaAddrs The addresses of the ArrowSchema structures.
@@ -69,8 +78,16 @@ public int next(long[] arrayAddrs, long[] schemaAddrs) {
6978
if (currentBatch == null) {
7079
return -1;
7180
}
81+
82+
// export the batch using the Arrow C Data Interface
7283
int numRows = nativeUtil.exportBatch(arrayAddrs, schemaAddrs, currentBatch);
84+
85+
// keep a reference to the exported batch so that it doesn't get garbage collected
86+
// while the native code is still processing it
87+
previousBatch = currentBatch;
88+
7389
currentBatch = null;
90+
7491
return numRows;
7592
}
7693
}

0 commit comments

Comments
 (0)