Skip to content

Commit f61afc4

Browse files
committed
Improve VariableWidthBlock encoding
Avoids converting between offsets and lengths when serializing and deserializing VariableWidthBlock instances, which enables a fast-path conversion for blocks without nulls present. When nulls are present, the compaction and expansion of offsets still outperforms the length to offset conversion.
1 parent 1ab232f commit f61afc4

File tree

2 files changed

+69
-65
lines changed

2 files changed

+69
-65
lines changed

core/trino-main/src/test/java/io/trino/execution/buffer/TestPagesSerde.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,13 @@ public void testVarcharSerializedSize()
218218
pageSize = 44; // Now we have moved to the normal block implementation so the page size overhead is 44
219219
page = new Page(builder.build());
220220
int firstValueSize = serializedSize(ImmutableList.of(VARCHAR), page) - pageSize;
221-
assertThat(firstValueSize).isEqualTo(8 + 5); // length + nonNullsCount + "alice"
221+
assertThat(firstValueSize).isEqualTo(8 + 5); // ending offsets + nonNullsCount + "alice"
222222

223223
// page with two values
224224
VARCHAR.writeString(builder, "bob");
225225
page = new Page(builder.build());
226226
int secondValueSize = serializedSize(ImmutableList.of(VARCHAR), page) - (pageSize + firstValueSize);
227-
assertThat(secondValueSize).isEqualTo(4 + 3); // length + "bob" (null shared with first entry)
227+
assertThat(secondValueSize).isEqualTo(4 + 3); // one additional ending offset + "bob" (null shared with first entry)
228228
}
229229

230230
private int serializedSize(List<? extends Type> types, Page expectedPage)

core/trino-spi/src/main/java/io/trino/spi/block/VariableWidthBlockEncoding.java

Lines changed: 67 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
import io.airlift.slice.Slices;
2020
import jakarta.annotation.Nullable;
2121

22-
import java.util.Arrays;
23-
2422
import static io.trino.spi.block.EncoderUtil.decodeNullBits;
2523
import static io.trino.spi.block.EncoderUtil.encodeNullsAsBits;
2624
import static java.lang.String.format;
@@ -54,61 +52,26 @@ public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceO
5452
int arrayBaseOffset = variableWidthBlock.getRawArrayBase();
5553
@Nullable
5654
boolean[] isNull = variableWidthBlock.getRawValueIsNull();
57-
int[] rawOffsets = variableWidthBlock.getRawOffsets();
58-
checkFromIndexSize(arrayBaseOffset, positionCount + 1, rawOffsets.length);
59-
60-
// lengths
61-
int[] lengths = new int[positionCount];
62-
int totalLength = 0;
63-
int nonNullsCount = 0;
64-
65-
for (int position = 0; position < positionCount; position++) {
66-
int length = rawOffsets[position + arrayBaseOffset + 1] - rawOffsets[position + arrayBaseOffset];
67-
totalLength += length;
68-
lengths[nonNullsCount] = length;
69-
nonNullsCount += isNull != null && isNull[position + arrayBaseOffset] ? 0 : 1;
70-
}
55+
encodeNullsAsBits(sliceOutput, isNull, arrayBaseOffset, positionCount);
7156

72-
sliceOutput
73-
.appendInt(nonNullsCount)
74-
.writeInts(lengths, 0, nonNullsCount);
57+
int[] rawOffsets = variableWidthBlock.getRawOffsets();
58+
writeOffsetsWithNullsCompacted(sliceOutput, rawOffsets, isNull, arrayBaseOffset, positionCount);
7559

76-
encodeNullsAsBits(sliceOutput, isNull, arrayBaseOffset, positionCount);
60+
int startingOffset = rawOffsets[arrayBaseOffset];
61+
int totalLength = rawOffsets[positionCount + arrayBaseOffset] - startingOffset;
7762

7863
sliceOutput
7964
.appendInt(totalLength)
80-
.writeBytes(variableWidthBlock.getRawSlice(), variableWidthBlock.getPositionOffset(0), totalLength);
65+
.writeBytes(variableWidthBlock.getRawSlice(), startingOffset, totalLength);
8166
}
8267

8368
@Override
8469
public Block readBlock(BlockEncodingSerde blockEncodingSerde, SliceInput sliceInput)
8570
{
8671
int positionCount = sliceInput.readInt();
87-
int nonNullsCount = sliceInput.readInt();
88-
89-
if (nonNullsCount > positionCount) {
90-
throw new IllegalArgumentException(format("nonNullsCount must be <= positionCount, found: %s > %s", nonNullsCount, positionCount));
91-
}
92-
93-
int[] offsets = new int[positionCount + 1];
94-
// Read the lengths array into the end of the offsets array, since nonNullsCount <= positionCount
95-
int lengthIndex = offsets.length - nonNullsCount;
96-
sliceInput.readInts(offsets, lengthIndex, nonNullsCount);
97-
9872
boolean[] valueIsNull = decodeNullBits(sliceInput, positionCount).orElse(null);
99-
// Transform lengths back to offsets
100-
if (valueIsNull == null) {
101-
if (positionCount != nonNullsCount || lengthIndex != 1) {
102-
throw new IllegalArgumentException(format("nonNullsCount must equal positionCount, found: %s <> %s", nonNullsCount, positionCount));
103-
}
104-
// Simplified loop for no nulls present
105-
for (int i = 1; i < offsets.length; i++) {
106-
offsets[i] += offsets[i - 1];
107-
}
108-
}
109-
else {
110-
computeOffsetsFromLengths(offsets, valueIsNull, lengthIndex);
111-
}
73+
74+
int[] offsets = readOffsetsWithNullsCompacted(sliceInput, valueIsNull, positionCount);
11275

11376
int blockSize = sliceInput.readInt();
11477
Slice slice = Slices.allocate(blockSize);
@@ -117,28 +80,69 @@ public Block readBlock(BlockEncodingSerde blockEncodingSerde, SliceInput sliceIn
11780
return new VariableWidthBlock(0, positionCount, slice, offsets, valueIsNull);
11881
}
11982

120-
private static void computeOffsetsFromLengths(int[] offsets, boolean[] valueIsNull, int lengthIndex)
83+
private static void writeOffsetsWithNullsCompacted(SliceOutput sliceOutput, int[] rawOffsets, @Nullable boolean[] valueIsNull, int baseOffset, int positionCount)
84+
{
85+
checkFromIndexSize(baseOffset, positionCount + 1, rawOffsets.length);
86+
87+
int startingOffset = rawOffsets[baseOffset];
88+
if (valueIsNull == null && startingOffset == 0) {
89+
// No translation of offsets required, write the range of raw offsets directly to the output
90+
sliceOutput
91+
.appendInt(positionCount)
92+
.writeInts(rawOffsets, baseOffset + 1, positionCount);
93+
}
94+
else {
95+
int[] nonNullOffsets;
96+
int nonNullOffsetsCount;
97+
if (valueIsNull == null) {
98+
// Subtract starting offset from each ending offset to translate them to start from zero, no null suppression required
99+
nonNullOffsets = new int[positionCount];
100+
for (int i = 0; i < nonNullOffsets.length; i++) {
101+
nonNullOffsets[i] = rawOffsets[i + baseOffset + 1] - startingOffset;
102+
}
103+
nonNullOffsetsCount = nonNullOffsets.length;
104+
}
105+
else {
106+
// Translate ending offsets and suppress null values from the output
107+
nonNullOffsets = new int[positionCount];
108+
nonNullOffsetsCount = 0;
109+
for (int i = 0; i < positionCount; i++) {
110+
nonNullOffsets[nonNullOffsetsCount] = rawOffsets[i + baseOffset + 1] - startingOffset;
111+
nonNullOffsetsCount += valueIsNull[i + baseOffset] ? 0 : 1;
112+
}
113+
}
114+
sliceOutput
115+
.appendInt(nonNullOffsetsCount)
116+
.writeInts(nonNullOffsets, 0, nonNullOffsetsCount);
117+
}
118+
}
119+
120+
private static int[] readOffsetsWithNullsCompacted(SliceInput sliceInput, @Nullable boolean[] valueIsNull, int positionCount)
121121
{
122-
if (lengthIndex < 0 || lengthIndex > offsets.length) {
123-
throw new IllegalArgumentException(format("Invalid lengthIndex %s for offsets %s", lengthIndex, offsets.length));
122+
if (valueIsNull != null && valueIsNull.length != positionCount) {
123+
throw new IllegalArgumentException(format("valueIsNull length must match positionCount, found %s <> %s", valueIsNull.length, positionCount));
124+
}
125+
int nonNullOffsetCount = sliceInput.readInt();
126+
if (nonNullOffsetCount > positionCount) {
127+
throw new IllegalArgumentException(format("nonNullOffsetCount must be <= positionCount, found: %s > %s", nonNullOffsetCount, positionCount));
124128
}
125-
int currentOffset = 0;
126-
for (int i = 1; i < offsets.length; i++) {
127-
if (lengthIndex == offsets.length) {
128-
// Populate remaining null elements
129-
Arrays.fill(offsets, i, offsets.length, currentOffset);
130-
break;
129+
// Offsets are read into the end of the array, expansion will pull values down into the lower range until null positions are expanded in place
130+
int[] offsets = new int[positionCount + 1];
131+
int compactIndex = offsets.length - nonNullOffsetCount;
132+
sliceInput.readInts(offsets, compactIndex, nonNullOffsetCount);
133+
if (valueIsNull == null || compactIndex == 1) {
134+
if (positionCount != nonNullOffsetCount) {
135+
throw new IllegalArgumentException(format("nonNullOffsetCount must match positionCount, found %s <> %s", nonNullOffsetCount, positionCount));
131136
}
132-
boolean isNull = valueIsNull[i - 1];
133-
// must be accessed unconditionally, otherwise CMOV optimization isn't applied due to
134-
// ArrayIndexOutOfBoundsException checks
135-
int length = offsets[lengthIndex];
136-
lengthIndex += isNull ? 0 : 1;
137-
currentOffset += isNull ? 0 : length;
138-
offsets[i] = currentOffset;
137+
return offsets;
139138
}
140-
if (lengthIndex != offsets.length) {
141-
throw new IllegalArgumentException(format("Failed to consume all length entries, found %s <> %s", lengthIndex, offsets.length));
139+
// Shift the offsets from the end of the offsets array downwards, repeating the previous offset when nulls are encountered
140+
// until no more nulls are present
141+
int readFrom = compactIndex - 1;
142+
for (int position = 0; position < readFrom; position++) {
143+
offsets[position] = offsets[readFrom];
144+
readFrom += valueIsNull[position] ? 0 : 1;
142145
}
146+
return offsets;
143147
}
144148
}

0 commit comments

Comments
 (0)