Skip to content

Commit 6d8505a

Browse files
FiV0lriggs
authored andcommitted
apacheGH-30866: [Java] fix SplitAndTransfer throws for (0,0) if vector empty (apache#41066)
This is addresses https://issues.apache.org/jira/browse/ARROW-15382 and is reopening of apache#12250 (which I asked to be reopened). I tried to address all the comments from the previous discussion, added some more tests and fixed an issue in the old commit. * GitHub Issue: apache#30866 Authored-by: Finn Völkel <finn.volkel@gmail.com> Signed-off-by: David Li <li.davidm96@gmail.com>
1 parent ed7efa2 commit 6d8505a

File tree

5 files changed

+140
-31
lines changed

5 files changed

+140
-31
lines changed

java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -763,16 +763,14 @@ public void transferTo(BaseLargeVariableWidthVector target) {
763763
*/
764764
public void splitAndTransferTo(int startIndex, int length,
765765
BaseLargeVariableWidthVector target) {
766-
Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount,
767-
"Invalid startIndex: %s", startIndex);
768-
Preconditions.checkArgument(startIndex + length <= valueCount,
769-
"Invalid length: %s", length);
766+
Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
767+
"Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
770768
compareTypes(target, "splitAndTransferTo");
771769
target.clear();
772-
splitAndTransferValidityBuffer(startIndex, length, target);
773-
splitAndTransferOffsetBuffer(startIndex, length, target);
774-
target.setLastSet(length - 1);
775770
if (length > 0) {
771+
splitAndTransferValidityBuffer(startIndex, length, target);
772+
splitAndTransferOffsetBuffer(startIndex, length, target);
773+
target.setLastSet(length - 1);
776774
target.setValueCount(length);
777775
}
778776
}

java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -808,10 +808,10 @@ public void splitAndTransferTo(int startIndex, int length,
808808
"Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
809809
compareTypes(target, "splitAndTransferTo");
810810
target.clear();
811-
splitAndTransferValidityBuffer(startIndex, length, target);
812-
splitAndTransferOffsetBuffer(startIndex, length, target);
813-
target.setLastSet(length - 1);
814811
if (length > 0) {
812+
splitAndTransferValidityBuffer(startIndex, length, target);
813+
splitAndTransferOffsetBuffer(startIndex, length, target);
814+
target.setLastSet(length - 1);
815815
target.setValueCount(length);
816816
}
817817
}

java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -552,21 +552,23 @@ public void transfer() {
552552
public void splitAndTransfer(int startIndex, int length) {
553553
Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
554554
"Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
555-
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
556-
final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
557555
to.clear();
558-
to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
559-
/* splitAndTransfer offset buffer */
560-
for (int i = 0; i < length + 1; i++) {
561-
final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
562-
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
556+
if (length > 0) {
557+
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
558+
final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
559+
to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
560+
/* splitAndTransfer offset buffer */
561+
for (int i = 0; i < length + 1; i++) {
562+
final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
563+
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
564+
}
565+
/* splitAndTransfer validity buffer */
566+
splitAndTransferValidityBuffer(startIndex, length, to);
567+
/* splitAndTransfer data buffer */
568+
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
569+
to.lastSet = length - 1;
570+
to.setValueCount(length);
563571
}
564-
/* splitAndTransfer validity buffer */
565-
splitAndTransferValidityBuffer(startIndex, length, to);
566-
/* splitAndTransfer data buffer */
567-
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
568-
to.lastSet = length - 1;
569-
to.setValueCount(length);
570572
}
571573

572574
/*

java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ public void testInvalidStartIndex() {
166166
IllegalArgumentException.class,
167167
() -> tp.splitAndTransfer(valueCount, 10));
168168

169-
assertEquals("Invalid startIndex: 500", e.getMessage());
169+
assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage());
170170
}
171171
}
172172

@@ -185,7 +185,7 @@ public void testInvalidLength() {
185185
IllegalArgumentException.class,
186186
() -> tp.splitAndTransfer(0, valueCount * 2));
187187

188-
assertEquals("Invalid length: 1000", e.getMessage());
188+
assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage());
189189
}
190190
}
191191

java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java

Lines changed: 115 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import org.apache.arrow.memory.BufferAllocator;
3030
import org.apache.arrow.memory.RootAllocator;
31+
import org.apache.arrow.vector.complex.DenseUnionVector;
3132
import org.apache.arrow.vector.complex.FixedSizeListVector;
3233
import org.apache.arrow.vector.complex.ListVector;
3334
import org.apache.arrow.vector.complex.MapVector;
@@ -49,7 +50,7 @@ public class TestSplitAndTransfer {
4950
public void init() {
5051
allocator = new RootAllocator(Long.MAX_VALUE);
5152
}
52-
53+
5354
@After
5455
public void terminate() throws Exception {
5556
allocator.close();
@@ -65,21 +66,130 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S
6566
}
6667
vector.setValueCount(valueCount);
6768
}
68-
69+
70+
private void populateIntVector(final IntVector vector, int valueCount) {
71+
for (int i = 0; i < valueCount; i++) {
72+
vector.set(i, i);
73+
}
74+
vector.setValueCount(valueCount);
75+
}
76+
77+
private void populateDenseUnionVector(final DenseUnionVector vector, int valueCount) {
78+
VarCharVector varCharVector = vector.addOrGet("varchar", FieldType.nullable(new ArrowType.Utf8()),
79+
VarCharVector.class);
80+
BigIntVector intVector = vector.addOrGet("int",
81+
FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
82+
83+
for (int i = 0; i < valueCount; i++) {
84+
vector.setTypeId(i, (byte) (i % 2));
85+
if (i % 2 == 0) {
86+
final String s = String.format("%010d", i);
87+
varCharVector.setSafe(i / 2, s.getBytes(StandardCharsets.UTF_8));
88+
} else {
89+
intVector.setSafe(i / 2, i);
90+
}
91+
}
92+
vector.setValueCount(valueCount);
93+
}
94+
95+
@Test
96+
public void testWithEmptyVector() {
97+
// MapVector use TransferImpl from ListVector
98+
ListVector listVector = ListVector.empty("", allocator);
99+
TransferPair transferPair = listVector.getTransferPair(allocator);
100+
transferPair.splitAndTransfer(0, 0);
101+
assertEquals(0, transferPair.getTo().getValueCount());
102+
// BaseFixedWidthVector
103+
IntVector intVector = new IntVector("", allocator);
104+
transferPair = intVector.getTransferPair(allocator);
105+
transferPair.splitAndTransfer(0, 0);
106+
assertEquals(0, transferPair.getTo().getValueCount());
107+
// BaseVariableWidthVector
108+
VarCharVector varCharVector = new VarCharVector("", allocator);
109+
transferPair = varCharVector.getTransferPair(allocator);
110+
transferPair.splitAndTransfer(0, 0);
111+
assertEquals(0, transferPair.getTo().getValueCount());
112+
// BaseLargeVariableWidthVector
113+
LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator);
114+
transferPair = largeVarCharVector.getTransferPair(allocator);
115+
transferPair.splitAndTransfer(0, 0);
116+
assertEquals(0, transferPair.getTo().getValueCount());
117+
// StructVector
118+
StructVector structVector = StructVector.empty("", allocator);
119+
transferPair = structVector.getTransferPair(allocator);
120+
transferPair.splitAndTransfer(0, 0);
121+
assertEquals(0, transferPair.getTo().getValueCount());
122+
// FixedSizeListVector
123+
FixedSizeListVector fixedSizeListVector = FixedSizeListVector.empty("", 0, allocator);
124+
transferPair = fixedSizeListVector.getTransferPair(allocator);
125+
transferPair.splitAndTransfer(0, 0);
126+
assertEquals(0, transferPair.getTo().getValueCount());
127+
// FixedSizeBinaryVector
128+
FixedSizeBinaryVector fixedSizeBinaryVector = new FixedSizeBinaryVector("", allocator, 4);
129+
transferPair = fixedSizeBinaryVector.getTransferPair(allocator);
130+
transferPair.splitAndTransfer(0, 0);
131+
assertEquals(0, transferPair.getTo().getValueCount());
132+
// UnionVector
133+
UnionVector unionVector = UnionVector.empty("", allocator);
134+
transferPair = unionVector.getTransferPair(allocator);
135+
transferPair.splitAndTransfer(0, 0);
136+
assertEquals(0, transferPair.getTo().getValueCount());
137+
// DenseUnionVector
138+
DenseUnionVector duv = DenseUnionVector.empty("", allocator);
139+
transferPair = duv.getTransferPair(allocator);
140+
transferPair.splitAndTransfer(0, 0);
141+
assertEquals(0, transferPair.getTo().getValueCount());
142+
143+
// non empty from vector
144+
145+
// BaseFixedWidthVector
146+
IntVector fromIntVector = new IntVector("", allocator);
147+
fromIntVector.allocateNew(100);
148+
populateIntVector(fromIntVector, 100);
149+
transferPair = fromIntVector.getTransferPair(allocator);
150+
IntVector toIntVector = (IntVector) transferPair.getTo();
151+
transferPair.splitAndTransfer(0, 0);
152+
assertEquals(0, toIntVector.getValueCount());
153+
154+
transferPair.splitAndTransfer(50, 0);
155+
assertEquals(0, toIntVector.getValueCount());
156+
157+
transferPair.splitAndTransfer(100, 0);
158+
assertEquals(0, toIntVector.getValueCount());
159+
fromIntVector.clear();
160+
toIntVector.clear();
161+
162+
// DenseUnionVector
163+
DenseUnionVector fromDuv = DenseUnionVector.empty("", allocator);
164+
populateDenseUnionVector(fromDuv, 100);
165+
transferPair = fromDuv.getTransferPair(allocator);
166+
DenseUnionVector toDUV = (DenseUnionVector) transferPair.getTo();
167+
transferPair.splitAndTransfer(0, 0);
168+
assertEquals(0, toDUV.getValueCount());
169+
170+
transferPair.splitAndTransfer(50, 0);
171+
assertEquals(0, toDUV.getValueCount());
172+
173+
transferPair.splitAndTransfer(100, 0);
174+
assertEquals(0, toDUV.getValueCount());
175+
fromDuv.clear();
176+
toDUV.clear();
177+
}
178+
69179
@Test /* VarCharVector */
70180
public void test() throws Exception {
71181
try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
72182
varCharVector.allocateNew(10000, 1000);
73183

74184
final int valueCount = 500;
75185
final String[] compareArray = new String[valueCount];
76-
186+
77187
populateVarcharVector(varCharVector, valueCount, compareArray);
78-
188+
79189
final TransferPair tp = varCharVector.getTransferPair(allocator);
80190
final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
81191
final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};
82-
192+
83193
for (final int[] startLength : startLengths) {
84194
final int start = startLength[0];
85195
final int length = startLength[1];
@@ -429,5 +539,4 @@ public void testMapVectorZeroStartIndexAndLength() {
429539
newMapVector.clear();
430540
}
431541
}
432-
433542
}

0 commit comments

Comments
 (0)