Skip to content

Commit 013881e

Browse files
DX-103340: Allow using 1GB+ data buffers in variable width vectors
Backport of apache/arrow-java#722
1 parent 0bc7698 commit 013881e

File tree

5 files changed

+48
-25
lines changed

5 files changed

+48
-25
lines changed

java/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,8 @@
478478
<io.netty.tryReflectionSetAccessible>true</io.netty.tryReflectionSetAccessible>
479479
<user.timezone>UTC</user.timezone>
480480
<!-- Note: changing the below configuration might increase the max allocation size for a vector
481-
which in turn can cause OOM. -->
482-
<arrow.vector.max_allocation_bytes>1048576</arrow.vector.max_allocation_bytes>
481+
which in turn can cause OOM. Using 2MB - 1byte to simulate the defaul limit of 2^31 - 1 bytes. -->
482+
<arrow.vector.max_allocation_bytes>2097151</arrow.vector.max_allocation_bytes>
483483
</systemPropertyVariables>
484484
</configuration>
485485
</plugin>

java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.arrow.vector;
1919

20-
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
21-
2220
import java.nio.ByteBuffer;
2321
import java.util.ArrayList;
2422
import java.util.Collections;
@@ -30,6 +28,7 @@
3028
import org.apache.arrow.memory.util.ArrowBufPointer;
3129
import org.apache.arrow.memory.util.ByteFunctionHelpers;
3230
import org.apache.arrow.memory.util.CommonUtil;
31+
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
3332
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
3433
import org.apache.arrow.util.Preconditions;
3534
import org.apache.arrow.vector.compare.VectorVisitor;
@@ -568,10 +567,13 @@ public void reallocDataBuffer(long desiredAllocSize) {
568567
return;
569568
}
570569

571-
final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
570+
final long newAllocationSize =
571+
Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE);
572572
assert newAllocationSize >= 1;
573573

574-
checkDataBufferSize(newAllocationSize);
574+
if (newAllocationSize < desiredAllocSize) {
575+
checkDataBufferSize(desiredAllocSize);
576+
}
575577

576578
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
577579
newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity());

java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,6 @@
1717

1818
package org.apache.arrow.vector;
1919

20-
import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
21-
import static org.apache.arrow.vector.TestUtils.newVarCharVector;
22-
import static org.apache.arrow.vector.TestUtils.newVector;
23-
import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
24-
import static org.junit.Assert.assertArrayEquals;
25-
import static org.junit.Assert.assertEquals;
26-
import static org.junit.Assert.assertFalse;
27-
import static org.junit.Assert.assertNull;
28-
import static org.junit.Assert.assertSame;
29-
import static org.junit.Assert.assertTrue;
30-
3120
import java.nio.ByteBuffer;
3221
import java.nio.charset.Charset;
3322
import java.nio.charset.StandardCharsets;
@@ -42,6 +31,9 @@
4231
import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
4332
import org.apache.arrow.memory.util.ArrowBufPointer;
4433
import org.apache.arrow.memory.util.CommonUtil;
34+
import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
35+
import static org.apache.arrow.vector.TestUtils.newVarCharVector;
36+
import static org.apache.arrow.vector.TestUtils.newVector;
4537
import org.apache.arrow.vector.compare.Range;
4638
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
4739
import org.apache.arrow.vector.compare.VectorEqualsVisitor;
@@ -58,6 +50,7 @@
5850
import org.apache.arrow.vector.holders.NullableVarCharHolder;
5951
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
6052
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
53+
import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
6154
import org.apache.arrow.vector.types.Types;
6255
import org.apache.arrow.vector.types.Types.MinorType;
6356
import org.apache.arrow.vector.types.UnionMode;
@@ -70,6 +63,12 @@
7063
import org.apache.arrow.vector.util.Text;
7164
import org.apache.arrow.vector.util.TransferPair;
7265
import org.junit.After;
66+
import static org.junit.Assert.assertArrayEquals;
67+
import static org.junit.Assert.assertEquals;
68+
import static org.junit.Assert.assertFalse;
69+
import static org.junit.Assert.assertNull;
70+
import static org.junit.Assert.assertSame;
71+
import static org.junit.Assert.assertTrue;
7372
import org.junit.Before;
7473
import org.junit.Test;
7574

@@ -92,7 +91,7 @@ public void init() {
9291
private static final byte[] STR5 = "EEE5".getBytes(utf8Charset);
9392
private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
9493
private static final int MAX_VALUE_COUNT =
95-
(int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7);
94+
(int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 9);
9695
private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2);
9796

9897
@After

java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@
1717

1818
package org.apache.arrow.vector;
1919

20-
import static org.junit.Assert.*;
21-
2220
import java.nio.charset.StandardCharsets;
2321

2422
import org.apache.arrow.memory.BufferAllocator;
2523
import org.apache.arrow.memory.RootAllocator;
24+
import org.apache.arrow.memory.util.CommonUtil;
2625
import org.apache.arrow.vector.complex.DenseUnionVector;
2726
import org.apache.arrow.vector.complex.FixedSizeListVector;
2827
import org.apache.arrow.vector.complex.ListVector;
@@ -39,6 +38,9 @@
3938
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
4039
import org.junit.After;
4140
import org.junit.Assert;
41+
import static org.junit.Assert.assertEquals;
42+
import static org.junit.Assert.assertNull;
43+
import static org.junit.Assert.assertTrue;
4244
import org.junit.Before;
4345
import org.junit.Test;
4446

@@ -223,6 +225,17 @@ public void testVariableAllocateAfterReAlloc() throws Exception {
223225
}
224226
}
225227

228+
@Test
229+
public void testVariableReAllocAbove1GB() throws Exception {
230+
try (final VarCharVector vector = new VarCharVector("", allocator)) {
231+
long desiredSizeAboveLastPowerOf2 =
232+
CommonUtil.nextPowerOfTwo(BaseVariableWidthVector.MAX_ALLOCATION_SIZE) / 2 + 1;
233+
vector.reallocDataBuffer(desiredSizeAboveLastPowerOf2);
234+
235+
assertTrue(vector.getDataBuffer().capacity() >= desiredSizeAboveLastPowerOf2);
236+
}
237+
}
238+
226239
@Test
227240
public void testLargeVariableAllocateAfterReAlloc() throws Exception {
228241
try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {

java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,14 @@
1717

1818
package org.apache.arrow.vector.util;
1919

20-
import static junit.framework.TestCase.assertEquals;
21-
import static junit.framework.TestCase.assertTrue;
22-
import static org.junit.jupiter.api.Assertions.assertThrows;
23-
2420
import java.nio.charset.StandardCharsets;
2521
import java.util.Arrays;
2622
import java.util.Collections;
2723
import java.util.List;
2824

2925
import org.apache.arrow.memory.BufferAllocator;
3026
import org.apache.arrow.memory.RootAllocator;
27+
import org.apache.arrow.memory.util.CommonUtil;
3128
import org.apache.arrow.vector.BaseValueVector;
3229
import org.apache.arrow.vector.BigIntVector;
3330
import org.apache.arrow.vector.BitVector;
@@ -56,6 +53,10 @@
5653
import org.junit.After;
5754
import org.junit.Before;
5855
import org.junit.Test;
56+
import static org.junit.jupiter.api.Assertions.assertThrows;
57+
58+
import static junit.framework.TestCase.assertEquals;
59+
import static junit.framework.TestCase.assertTrue;
5960

6061
/**
6162
* Test cases for {@link VectorAppender}.
@@ -191,7 +192,15 @@ public void testAppendEmptyVariableWidthVector() {
191192

192193
@Test
193194
public void testAppendLargeAndSmallVariableVectorsWithinLimit() {
194-
int sixteenthOfMaxAllocation = Math.toIntExact(BaseValueVector.MAX_ALLOCATION_SIZE / 16);
195+
// Using the max power of 2 allocation size to avoid hitting the max limit at round ups
196+
long maxPowerOfTwoAllocationSize =
197+
CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE);
198+
if (maxPowerOfTwoAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
199+
maxPowerOfTwoAllocationSize =
200+
CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE / 2);
201+
}
202+
203+
int sixteenthOfMaxAllocation = Math.toIntExact(maxPowerOfTwoAllocationSize / 16);
195204
try (VarCharVector target = makeVarCharVec(1, sixteenthOfMaxAllocation);
196205
VarCharVector delta = makeVarCharVec(sixteenthOfMaxAllocation, 1)) {
197206
new VectorAppender(delta).visit(target, null);

0 commit comments

Comments
 (0)