Skip to content

Commit 8b09237

Browse files
author
Yicong Huang
committed
fix: offset buffer with empty array
1 parent 9cdda52 commit 8b09237

File tree

4 files changed

+89
-2
lines changed

4 files changed

+89
-2
lines changed

vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
275275
@Override
276276
public List<ArrowBuf> getFieldBuffers() {
277277
List<ArrowBuf> result = new ArrayList<>(2);
278+
279+
// Ensure offset buffer has at least one entry for offset[0].
280+
// According to Arrow specification, offset buffer must have N+1 entries,
281+
// even when N=0, it should contain [0].
282+
if (offsetBuffer.capacity() == 0) {
283+
offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH);
284+
}
285+
278286
setReaderAndWriterIndex();
279287
result.add(validityBuffer);
280288
result.add(offsetBuffer);
@@ -309,7 +317,8 @@ private void setReaderAndWriterIndex() {
309317
offsetBuffer.readerIndex(0);
310318
if (valueCount == 0) {
311319
validityBuffer.writerIndex(0);
312-
offsetBuffer.writerIndex(0);
320+
// Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec
321+
offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity()));
313322
} else {
314323
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
315324
offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);

vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
233233
@Override
234234
public List<ArrowBuf> getFieldBuffers() {
235235
List<ArrowBuf> result = new ArrayList<>(2);
236+
237+
// Ensure offset buffer has at least one entry for offset[0].
238+
// According to Arrow specification, offset buffer must have N+1 entries,
239+
// even when N=0, it should contain [0].
240+
if (offsetBuffer.capacity() == 0) {
241+
offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH);
242+
}
243+
236244
setReaderAndWriterIndex();
237245
result.add(validityBuffer);
238246
result.add(offsetBuffer);
@@ -267,7 +275,8 @@ private void setReaderAndWriterIndex() {
267275
offsetBuffer.readerIndex(0);
268276
if (valueCount == 0) {
269277
validityBuffer.writerIndex(0);
270-
offsetBuffer.writerIndex(0);
278+
// Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec
279+
offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity()));
271280
} else {
272281
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
273282
offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);

vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,36 @@ public void testCopyValueSafeForExtensionType() throws Exception {
11001100
}
11011101
}
11021102

1103+
@Test
1104+
public void testNestedEmptyLargeListOffsetBuffer() {
1105+
// Test that nested LargeListVector properly allocates offset buffer
1106+
// even when nested writers are never invoked. According to Arrow spec,
1107+
// offset buffer must have N+1 entries. Even when N=0, it should contain [0].
1108+
try (LargeListVector outerList = LargeListVector.empty("outer", allocator)) {
1109+
// Setup LargeList<LargeList<Int>>
1110+
outerList.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType()));
1111+
LargeListVector innerList = (LargeListVector) outerList.getDataVector();
1112+
innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
1113+
1114+
// Allocate outer only - simulates case where inner is never written to
1115+
outerList.allocateNew();
1116+
outerList.setValueCount(0);
1117+
1118+
// Get field buffers - this is what IPC serialization uses
1119+
List<ArrowBuf> innerBuffers = innerList.getFieldBuffers();
1120+
1121+
// Verify inner list offset buffer has at least OFFSET_WIDTH (8) bytes
1122+
assertTrue(
1123+
innerBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH,
1124+
"Inner LargeList offset buffer should have at least "
1125+
+ LargeListVector.OFFSET_WIDTH
1126+
+ " bytes for offset[0]");
1127+
1128+
// Verify offset[0] = 0
1129+
assertEquals(0L, innerList.getOffsetBuffer().getLong(0));
1130+
}
1131+
}
1132+
11031133
private void writeIntValues(UnionLargeListWriter writer, int[] values) {
11041134
writer.startList();
11051135
for (int v : values) {

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,6 +1379,45 @@ public void testCopyValueSafeForExtensionType() throws Exception {
13791379
}
13801380
}
13811381

1382+
@Test
1383+
public void testNestedEmptyListOffsetBuffer() {
1384+
// Test that 3-level nested ListVector properly allocates offset buffers
1385+
// even when nested writers are never invoked. According to Arrow spec,
1386+
// offset buffer must have N+1 entries. Even when N=0, it should contain [0].
1387+
try (ListVector level0 = ListVector.empty("level0", allocator)) {
1388+
// Setup List<List<List<Int>>> - 3 levels
1389+
level0.addOrGetVector(FieldType.nullable(MinorType.LIST.getType()));
1390+
ListVector level1 = (ListVector) level0.getDataVector();
1391+
level1.addOrGetVector(FieldType.nullable(MinorType.LIST.getType()));
1392+
ListVector level2 = (ListVector) level1.getDataVector();
1393+
level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
1394+
1395+
// Only allocate level0 - simulates case where all nested levels are empty
1396+
level0.allocateNew();
1397+
level0.setValueCount(0);
1398+
1399+
// Verify all levels have properly allocated offset buffers
1400+
List<ArrowBuf> level1Buffers = level1.getFieldBuffers();
1401+
List<ArrowBuf> level2Buffers = level2.getFieldBuffers();
1402+
1403+
assertTrue(
1404+
level1Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH,
1405+
"Level1 offset buffer should have at least "
1406+
+ BaseRepeatedValueVector.OFFSET_WIDTH
1407+
+ " bytes for offset[0]");
1408+
1409+
assertTrue(
1410+
level2Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH,
1411+
"Level2 offset buffer should have at least "
1412+
+ BaseRepeatedValueVector.OFFSET_WIDTH
1413+
+ " bytes for offset[0]");
1414+
1415+
// Verify offset[0] = 0 for all levels
1416+
assertEquals(0, level1.getOffsetBuffer().getInt(0));
1417+
assertEquals(0, level2.getOffsetBuffer().getInt(0));
1418+
}
1419+
}
1420+
13821421
private void writeIntValues(UnionListWriter writer, int[] values) {
13831422
writer.startList();
13841423
for (int v : values) {

0 commit comments

Comments
 (0)