Skip to content

Commit 52f7a86

Browse files
authored
GH-816: Presize JsonStringArrayList vector results (#817)
## What's Changed Presize `JsonStringArrayList`s when constructing them as part of `ValueVector#getObject` conversions. `FixedSizeListVector#getObject` already performs this optimization; however, `ListVector`, `ListViewVector`, `LargeListVector`, and `LargeListViewVector` do not yet presize the result `JsonStringArrayList` requiring dynamic reallocations as elements are converted & added. This can become a scalability bottleneck when using these types. Closes #816.
1 parent 65caf50 commit 52f7a86

File tree

6 files changed

+15
-10
lines changed

6 files changed

+15
-10
lines changed

adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,8 +1071,8 @@ private static FieldType createFieldType(
10711071
}
10721072

10731073
private static String convertAliases(Set<String> aliases) {
1074-
JsonStringArrayList jsonList = new JsonStringArrayList();
1075-
aliases.stream().forEach(a -> jsonList.add(a));
1074+
JsonStringArrayList jsonList = new JsonStringArrayList(aliases.size());
1075+
jsonList.addAll(aliases);
10761076
return jsonList.toString();
10771077
}
10781078
}

adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
package org.apache.arrow.adapter.avro;
1818

1919
import java.io.EOFException;
20-
import java.util.ArrayList;
2120
import java.util.Iterator;
2221
import java.util.List;
2322
import java.util.stream.Collectors;
2423
import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer;
24+
import org.apache.arrow.adapter.avro.consumers.Consumer;
2525
import org.apache.arrow.util.Preconditions;
2626
import org.apache.arrow.vector.FieldVector;
27+
import org.apache.arrow.vector.ValueVector;
2728
import org.apache.arrow.vector.VectorSchemaRoot;
2829
import org.apache.arrow.vector.types.pojo.Field;
2930
import org.apache.arrow.vector.util.ValueVectorUtility;
@@ -75,9 +76,11 @@ public static AvroToArrowVectorIterator create(
7576
private void initialize() {
7677
// create consumers
7778
compositeConsumer = AvroToArrowUtils.createCompositeConsumer(schema, config);
78-
List<FieldVector> vectors = new ArrayList<>();
79-
compositeConsumer.getConsumers().forEach(c -> vectors.add(c.getVector()));
80-
List<Field> fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList());
79+
List<FieldVector> vectors =
80+
compositeConsumer.getConsumers().stream()
81+
.map(Consumer::getVector)
82+
.collect(Collectors.toList());
83+
List<Field> fields = vectors.stream().map(ValueVector::getField).collect(Collectors.toList());
8184
VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0);
8285
rootSchema = root.getSchema();
8386

vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.arrow.memory.util.ArrowBufPointer;
3232
import org.apache.arrow.memory.util.ByteFunctionHelpers;
3333
import org.apache.arrow.memory.util.CommonUtil;
34+
import org.apache.arrow.memory.util.LargeMemoryUtil;
3435
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
3536
import org.apache.arrow.util.Preconditions;
3637
import org.apache.arrow.vector.AddOrGetResult;
@@ -861,10 +862,11 @@ public List<?> getObject(int index) {
861862
if (isSet(index) == 0) {
862863
return null;
863864
}
864-
final List<Object> vals = new JsonStringArrayList<>();
865865
final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
866866
final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
867867
final ValueVector vv = getDataVector();
868+
final List<Object> vals =
869+
new JsonStringArrayList<>(LargeMemoryUtil.checkedCastToInt(end - start));
868870
for (long i = start; i < end; i++) {
869871
vals.add(vv.getObject(checkedCastToInt(i)));
870872
}

vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,10 +672,10 @@ public List<?> getObject(int index) {
672672
if (isSet(index) == 0) {
673673
return null;
674674
}
675-
final List<Object> vals = new JsonStringArrayList<>();
676675
final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
677676
final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH);
678677
final ValueVector vv = getDataVector();
678+
final List<Object> vals = new JsonStringArrayList<>(end - start);
679679
for (int i = start; i < end; i++) {
680680
vals.add(vv.getObject(checkedCastToInt(i)));
681681
}

vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -719,10 +719,10 @@ public List<?> getObject(int index) {
719719
if (isSet(index) == 0) {
720720
return null;
721721
}
722-
final List<Object> vals = new JsonStringArrayList<>();
723722
final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
724723
final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
725724
final ValueVector vv = getDataVector();
725+
final List<Object> vals = new JsonStringArrayList<>(end - start);
726726
for (int i = start; i < end; i++) {
727727
vals.add(vv.getObject(i));
728728
}

vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,10 +678,10 @@ public List<?> getObject(int index) {
678678
if (isSet(index) == 0) {
679679
return null;
680680
}
681-
final List<Object> vals = new JsonStringArrayList<>();
682681
final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
683682
final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH);
684683
final ValueVector vv = getDataVector();
684+
final List<Object> vals = new JsonStringArrayList<>(end - start);
685685
for (int i = start; i < end; i++) {
686686
vals.add(vv.getObject(i));
687687
}

0 commit comments

Comments
 (0)