Skip to content

Commit ebfd863

Browse files
uschindlerjpountz
andauthored
Rewrite of the GroupVInt optimization without lambdas, varhandles and no code in subclasses (#15116)
* Rewrite of the GroupVInt optimization without lamdas, varhandles and no code in subclasses * Remove optimized flag and duplicate code * apply suggestions/fixes: fix maximum size; cleanup baseline method and hide internal details * Apply suggestions from code review Co-authored-by: Adrien Grand <[email protected]> --------- Co-authored-by: Adrien Grand <[email protected]>
1 parent b6e57da commit ebfd863

File tree

7 files changed

+65
-140
lines changed

7 files changed

+65
-140
lines changed

lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@
2323
import java.util.concurrent.TimeUnit;
2424
import org.apache.lucene.store.ByteArrayDataInput;
2525
import org.apache.lucene.store.ByteArrayDataOutput;
26-
import org.apache.lucene.store.ByteBuffersDataInput;
2726
import org.apache.lucene.store.ByteBuffersDataOutput;
28-
import org.apache.lucene.store.DataInput;
27+
import org.apache.lucene.store.ByteBuffersDirectory;
2928
import org.apache.lucene.store.Directory;
3029
import org.apache.lucene.store.IOContext;
3130
import org.apache.lucene.store.IndexInput;
@@ -50,8 +49,8 @@
5049
@BenchmarkMode(Mode.Throughput)
5150
@OutputTimeUnit(TimeUnit.MICROSECONDS)
5251
@State(Scope.Benchmark)
53-
@Warmup(iterations = 3, time = 3)
54-
@Measurement(iterations = 5, time = 5)
52+
@Warmup(iterations = 4, time = 8)
53+
@Measurement(iterations = 5, time = 20)
5554
@Fork(
5655
value = 1,
5756
jvmArgsPrepend = {"--add-modules=jdk.unsupported"})
@@ -92,7 +91,7 @@ public class GroupVIntBenchmark {
9291
IndexInput mmapGVIntIn;
9392
IndexInput nioGVIntIn;
9493
IndexInput mmapVIntIn;
95-
ByteBuffersDataInput byteBuffersGVIntIn;
94+
IndexInput byteBuffersGVIntIn;
9695

9796
ByteArrayDataInput byteArrayVIntIn;
9897
ByteArrayDataInput byteArrayGVIntIn;
@@ -125,9 +124,11 @@ void initNioInput(int[] docs) throws Exception {
125124
}
126125

127126
void initByteBuffersInput(int[] docs) throws Exception {
128-
ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();
129-
buffer.writeGroupVInts(docs, docs.length);
130-
byteBuffersGVIntIn = buffer.toDataInput();
127+
Directory dir = new ByteBuffersDirectory();
128+
IndexOutput out = dir.createOutput("gvint", IOContext.DEFAULT);
129+
out.writeGroupVInts(docs, docs.length);
130+
out.close();
131+
byteBuffersGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
131132
}
132133

133134
void initMMapInput(int[] docs) throws Exception {
@@ -145,16 +146,6 @@ void initMMapInput(int[] docs) throws Exception {
145146
mmapVIntIn = dir.openInput("vint", IOContext.DEFAULT);
146147
}
147148

148-
private void readGroupVIntsBaseline(DataInput in, int[] dst, int limit) throws IOException {
149-
int i;
150-
for (i = 0; i <= limit - 4; i += 4) {
151-
GroupVIntUtil.readGroupVInt(in, dst, i);
152-
}
153-
for (; i < limit; ++i) {
154-
dst[i] = in.readVInt();
155-
}
156-
}
157-
158149
@Setup(Level.Trial)
159150
public void init() throws Exception {
160151
Random r = new Random(0);
@@ -193,7 +184,7 @@ public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOExcept
193184
@Benchmark
194185
public void benchMMapDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
195186
mmapGVIntIn.seek(0);
196-
this.readGroupVIntsBaseline(mmapGVIntIn, values, size);
187+
GroupVIntUtil.readGroupVInts$Baseline(mmapGVIntIn, values, size);
197188
bh.consume(values);
198189
}
199190

@@ -223,7 +214,7 @@ public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOExcep
223214
@Benchmark
224215
public void benchNIOFSDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
225216
nioGVIntIn.seek(0);
226-
this.readGroupVIntsBaseline(nioGVIntIn, values, size);
217+
GroupVIntUtil.readGroupVInts$Baseline(nioGVIntIn, values, size);
227218
bh.consume(values);
228219
}
229220

@@ -237,7 +228,7 @@ public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOExce
237228
@Benchmark
238229
public void benchByteBuffersIndexInput_readGroupVIntBaseline(Blackhole bh) throws IOException {
239230
byteBuffersGVIntIn.seek(0);
240-
this.readGroupVIntsBaseline(byteBuffersGVIntIn, values, size);
231+
GroupVIntUtil.readGroupVInts$Baseline(byteBuffersGVIntIn, values, size);
241232
bh.consume(values);
242233
}
243234

lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import java.io.IOException;
2121
import java.nio.ByteBuffer;
2222
import java.nio.ByteOrder;
23-
import org.apache.lucene.util.GroupVIntUtil;
2423

2524
/** Base implementation class for buffered {@link IndexInput}. */
2625
public abstract class BufferedIndexInput extends IndexInput implements RandomAccessInput {
@@ -150,16 +149,6 @@ public final int readInt() throws IOException {
150149
}
151150
}
152151

153-
@Override
154-
public void readGroupVInt(int[] dst, int offset) throws IOException {
155-
final int len =
156-
GroupVIntUtil.readGroupVInt(
157-
this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset);
158-
if (len > 0) {
159-
buffer.position(buffer.position() + len);
160-
}
161-
}
162-
163152
@Override
164153
public final long readLong() throws IOException {
165154
if (Long.BYTES <= buffer.remaining()) {

lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import java.util.Locale;
3030
import java.util.stream.Collectors;
3131
import org.apache.lucene.util.Accountable;
32-
import org.apache.lucene.util.GroupVIntUtil;
3332
import org.apache.lucene.util.RamUsageEstimator;
3433

3534
/**
@@ -203,25 +202,6 @@ public long readLong() throws IOException {
203202
}
204203
}
205204

206-
@Override
207-
public void readGroupVInt(int[] dst, int offset) throws IOException {
208-
final ByteBuffer block = blocks[blockIndex(pos)];
209-
final int blockOffset = blockOffset(pos);
210-
// We MUST save the return value to local variable, could not use pos += readGroupVInt(...).
211-
// because `pos +=` in java will move current value(not address) of pos to register first,
212-
// then call the function, but we will update pos value in function via readByte(), then
213-
// `pos +=` will use an old pos value plus return value, thereby missing 1 byte.
214-
final int len =
215-
GroupVIntUtil.readGroupVInt(
216-
this,
217-
block.limit() - blockOffset,
218-
p -> block.getInt((int) p),
219-
blockOffset,
220-
dst,
221-
offset);
222-
pos += len;
223-
}
224-
225205
@Override
226206
public long length() {
227207
return length;

lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,12 +205,6 @@ public void readLongs(long[] dst, int offset, int length) throws IOException {
205205
in.readLongs(dst, offset, length);
206206
}
207207

208-
@Override
209-
public void readGroupVInt(int[] dst, int offset) throws IOException {
210-
ensureOpen();
211-
in.readGroupVInt(dst, offset);
212-
}
213-
214208
@Override
215209
public IndexInput clone() {
216210
ensureOpen();

lucene/core/src/java/org/apache/lucene/store/DataInput.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.util.TreeMap;
2828
import java.util.TreeSet;
2929
import org.apache.lucene.util.BitUtil;
30-
import org.apache.lucene.util.GroupVIntUtil;
3130

3231
/**
3332
* Abstract base class for performing read operations of Lucene's low-level data types.
@@ -99,16 +98,6 @@ public int readInt() throws IOException {
9998
return ((b4 & 0xFF) << 24) | ((b3 & 0xFF) << 16) | ((b2 & 0xFF) << 8) | (b1 & 0xFF);
10099
}
101100

102-
/**
103-
* Override if you have an efficient implementation. In general this is when the input supports
104-
* random access.
105-
*
106-
* @lucene.experimental
107-
*/
108-
public void readGroupVInt(int[] dst, int offset) throws IOException {
109-
GroupVIntUtil.readGroupVInt(this, dst, offset);
110-
}
111-
112101
/**
113102
* Reads an int stored in variable-length format. Reads between one and five bytes. Smaller values
114103
* take fewer bytes. Negative numbers are supported, but should be avoided.

lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.lucene.util.ArrayUtil;
3232
import org.apache.lucene.util.BitUtil;
3333
import org.apache.lucene.util.Constants;
34-
import org.apache.lucene.util.GroupVIntUtil;
3534
import org.apache.lucene.util.IOConsumer;
3635

3736
/**
@@ -429,23 +428,6 @@ public byte readByte(long pos) throws IOException {
429428
}
430429
}
431430

432-
@Override
433-
public void readGroupVInt(int[] dst, int offset) throws IOException {
434-
try {
435-
final int len =
436-
GroupVIntUtil.readGroupVInt(
437-
this,
438-
curSegment.byteSize() - curPosition,
439-
p -> curSegment.get(LAYOUT_LE_INT, p),
440-
curPosition,
441-
dst,
442-
offset);
443-
curPosition += len;
444-
} catch (NullPointerException | IllegalStateException e) {
445-
throw alreadyClosed(e);
446-
}
447-
}
448-
449431
@Override
450432
public void readBytes(long pos, byte[] b, int offset, int len) throws IOException {
451433
try {

lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,17 @@
1919
import java.io.IOException;
2020
import org.apache.lucene.store.DataInput;
2121
import org.apache.lucene.store.DataOutput;
22+
import org.apache.lucene.store.IndexInput;
23+
import org.apache.lucene.store.RandomAccessInput;
2224

2325
/**
2426
* This class contains utility methods and constants for group varint
2527
*
2628
* @lucene.internal
2729
*/
2830
public final class GroupVIntUtil {
29-
// the maximum length of a single group-varint is 4 integers + 1 byte flag.
30-
public static final int MAX_LENGTH_PER_GROUP = 17;
31+
// the maximum length of a single group-varint is 1 byte flag and 4 integers.
32+
public static final int MAX_LENGTH_PER_GROUP = Byte.BYTES + 4 * Integer.BYTES;
3133

3234
private static final int[] INT_MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, ~0};
3335

@@ -41,7 +43,7 @@ public final class GroupVIntUtil {
4143
public static void readGroupVInts(DataInput in, int[] dst, int limit) throws IOException {
4244
int i;
4345
for (i = 0; i <= limit - 4; i += 4) {
44-
in.readGroupVInt(dst, i);
46+
readGroupVInt(in, dst, i);
4547
}
4648
for (; i < limit; ++i) {
4749
dst[i] = in.readVInt();
@@ -56,7 +58,54 @@ public static void readGroupVInts(DataInput in, int[] dst, int limit) throws IOE
5658
* @param dst the array to read ints into.
5759
* @param offset the offset in the array to start storing ints.
5860
*/
59-
public static void readGroupVInt(DataInput in, int[] dst, int offset) throws IOException {
61+
private static void readGroupVInt(DataInput in, int[] dst, int offset) throws IOException {
62+
final int flag = in.readByte() & 0xFF;
63+
64+
final int n1Minus1 = flag >> 6;
65+
final int n2Minus1 = (flag >> 4) & 0x03;
66+
final int n3Minus1 = (flag >> 2) & 0x03;
67+
final int n4Minus1 = flag & 0x03;
68+
69+
// if our DataInput implements RandomAccessInput for absolute access and IndexInput for seeking,
70+
// we use a branch-less implementation:
71+
if (in instanceof RandomAccessInput rin && in instanceof IndexInput iin) {
72+
long pos = iin.getFilePointer();
73+
if (iin.length() - pos >= 4 * Integer.BYTES) {
74+
dst[offset] = rin.readInt(pos) & INT_MASKS[n1Minus1];
75+
pos += 1 + n1Minus1;
76+
dst[offset + 1] = rin.readInt(pos) & INT_MASKS[n2Minus1];
77+
pos += 1 + n2Minus1;
78+
dst[offset + 2] = rin.readInt(pos) & INT_MASKS[n3Minus1];
79+
pos += 1 + n3Minus1;
80+
dst[offset + 3] = rin.readInt(pos) & INT_MASKS[n4Minus1];
81+
pos += 1 + n4Minus1;
82+
83+
iin.seek(pos);
84+
return;
85+
}
86+
}
87+
88+
// fall-through: default impl
89+
dst[offset] = readIntInGroup(in, n1Minus1);
90+
dst[offset + 1] = readIntInGroup(in, n2Minus1);
91+
dst[offset + 2] = readIntInGroup(in, n3Minus1);
92+
dst[offset + 3] = readIntInGroup(in, n4Minus1);
93+
}
94+
95+
/** DO not use! Only visible for benchmarking purposes! */
96+
public static void readGroupVInts$Baseline(DataInput in, int[] dst, int limit)
97+
throws IOException {
98+
int i;
99+
for (i = 0; i <= limit - 4; i += 4) {
100+
readGroupVInt$Baseline(in, dst, i);
101+
}
102+
for (; i < limit; ++i) {
103+
dst[i] = in.readVInt();
104+
}
105+
}
106+
107+
private static void readGroupVInt$Baseline(DataInput in, int[] dst, int offset)
108+
throws IOException {
60109
final int flag = in.readByte() & 0xFF;
61110

62111
final int n1Minus1 = flag >> 6;
@@ -83,55 +132,6 @@ private static int readIntInGroup(DataInput in, int numBytesMinus1) throws IOExc
83132
}
84133
}
85134

86-
/**
87-
* Provides an abstraction for read int values, so that decoding logic can be reused in different
88-
* DataInput.
89-
*/
90-
@FunctionalInterface
91-
public static interface IntReader {
92-
int read(long v);
93-
}
94-
95-
/**
96-
* Faster implementation of read single group, It read values from the buffer that would not cross
97-
* boundaries.
98-
*
99-
* @param in the input to use to read data.
100-
* @param remaining the number of remaining bytes allowed to read for current block/segment.
101-
* @param reader the supplier of read int.
102-
* @param pos the start pos to read from the reader.
103-
* @param dst the array to read ints into.
104-
* @param offset the offset in the array to start storing ints.
105-
* @return the number of bytes read excluding the flag. this indicates the number of positions
106-
* should to be increased for caller, it is 0 or positive number and less than {@link
107-
* #MAX_LENGTH_PER_GROUP}
108-
*/
109-
public static int readGroupVInt(
110-
DataInput in, long remaining, IntReader reader, long pos, int[] dst, int offset)
111-
throws IOException {
112-
if (remaining < MAX_LENGTH_PER_GROUP) {
113-
readGroupVInt(in, dst, offset);
114-
return 0;
115-
}
116-
final int flag = in.readByte() & 0xFF;
117-
final long posStart = ++pos; // exclude the flag bytes, the position has updated via readByte().
118-
final int n1Minus1 = flag >> 6;
119-
final int n2Minus1 = (flag >> 4) & 0x03;
120-
final int n3Minus1 = (flag >> 2) & 0x03;
121-
final int n4Minus1 = flag & 0x03;
122-
123-
// This code path has fewer conditionals and tends to be significantly faster in benchmarks
124-
dst[offset] = reader.read(pos) & INT_MASKS[n1Minus1];
125-
pos += 1 + n1Minus1;
126-
dst[offset + 1] = reader.read(pos) & INT_MASKS[n2Minus1];
127-
pos += 1 + n2Minus1;
128-
dst[offset + 2] = reader.read(pos) & INT_MASKS[n3Minus1];
129-
pos += 1 + n3Minus1;
130-
dst[offset + 3] = reader.read(pos) & INT_MASKS[n4Minus1];
131-
pos += 1 + n4Minus1;
132-
return (int) (pos - posStart);
133-
}
134-
135135
private static int numBytes(int v) {
136136
// | 1 to return 1 when v = 0
137137
return Integer.BYTES - (Integer.numberOfLeadingZeros(v | 1) >> 3);

0 commit comments

Comments
 (0)