Skip to content

Commit dc9f154

Browse files
authored
Move group-varint encoding/decoding logic to DataOutput/DataInput (apache#12841)
1 parent 9359a9d commit dc9f154

File tree

18 files changed

+451
-194
lines changed

18 files changed

+451
-194
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ Optimizations
120120

121121
* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)
122122

123+
* GITHUB#12841: Move group-varint encoding/decoding logic to DataOutput/DataInput. (Adrien Grand, Zhang Chao, Uwe Schindler)
124+
123125
Bug Fixes
124126
---------------------
125127

lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,18 @@
2121
import java.util.Arrays;
2222
import java.util.Random;
2323
import java.util.concurrent.TimeUnit;
24-
import org.apache.lucene.codecs.lucene99.GroupVIntReader;
25-
import org.apache.lucene.codecs.lucene99.GroupVIntWriter;
2624
import org.apache.lucene.store.ByteArrayDataInput;
2725
import org.apache.lucene.store.ByteArrayDataOutput;
26+
import org.apache.lucene.store.ByteBuffersDataInput;
27+
import org.apache.lucene.store.ByteBuffersDataOutput;
28+
import org.apache.lucene.store.DataInput;
2829
import org.apache.lucene.store.Directory;
2930
import org.apache.lucene.store.IOContext;
3031
import org.apache.lucene.store.IndexInput;
3132
import org.apache.lucene.store.IndexOutput;
3233
import org.apache.lucene.store.MMapDirectory;
34+
import org.apache.lucene.store.NIOFSDirectory;
35+
import org.apache.lucene.util.GroupVIntUtil;
3336
import org.openjdk.jmh.annotations.Benchmark;
3437
import org.openjdk.jmh.annotations.BenchmarkMode;
3538
import org.openjdk.jmh.annotations.Fork;
@@ -86,35 +89,49 @@ public class GroupVIntBenchmark {
8689
final long[] values = new long[maxSize];
8790

8891
IndexInput byteBufferGVIntIn;
92+
IndexInput nioGVIntIn;
8993
IndexInput byteBufferVIntIn;
94+
ByteBuffersDataInput byteBuffersGVIntIn;
9095

9196
ByteArrayDataInput byteArrayVIntIn;
9297
ByteArrayDataInput byteArrayGVIntIn;
9398

94-
// @Param({"16", "32", "64", "128", "248"})
9599
@Param({"64"})
96100
public int size;
97101

98102
void initArrayInput(long[] docs) throws Exception {
99103
byte[] gVIntBytes = new byte[Integer.BYTES * maxSize * 2];
100104
byte[] vIntBytes = new byte[Integer.BYTES * maxSize * 2];
101105
ByteArrayDataOutput vIntOut = new ByteArrayDataOutput(vIntBytes);
102-
GroupVIntWriter w = new GroupVIntWriter();
103-
w.writeValues(new ByteArrayDataOutput(gVIntBytes), docs, docs.length);
106+
ByteArrayDataOutput out = new ByteArrayDataOutput(gVIntBytes);
107+
out.writeGroupVInts(docs, docs.length);
104108
for (long v : docs) {
105109
vIntOut.writeVInt((int) v);
106110
}
107111
byteArrayVIntIn = new ByteArrayDataInput(vIntBytes);
108112
byteArrayGVIntIn = new ByteArrayDataInput(gVIntBytes);
109113
}
110114

115+
void initNioInput(long[] docs) throws Exception {
116+
Directory dir = new NIOFSDirectory(Files.createTempDirectory("groupvintdata"));
117+
IndexOutput out = dir.createOutput("gvint", IOContext.DEFAULT);
118+
out.writeGroupVInts(docs, docs.length);
119+
out.close();
120+
nioGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
121+
}
122+
123+
void initByteBuffersInput(long[] docs) throws Exception {
124+
ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();
125+
buffer.writeGroupVInts(docs, docs.length);
126+
byteBuffersGVIntIn = buffer.toDataInput();
127+
}
128+
111129
void initByteBufferInput(long[] docs) throws Exception {
112-
Directory dir = MMapDirectory.open(Files.createTempDirectory("groupvintdata"));
130+
Directory dir = new MMapDirectory(Files.createTempDirectory("groupvintdata"));
113131
IndexOutput vintOut = dir.createOutput("vint", IOContext.DEFAULT);
114132
IndexOutput gvintOut = dir.createOutput("gvint", IOContext.DEFAULT);
115133

116-
GroupVIntWriter w = new GroupVIntWriter();
117-
w.writeValues(gvintOut, docs, docs.length);
134+
gvintOut.writeGroupVInts(docs, docs.length);
118135
for (long v : docs) {
119136
vintOut.writeVInt((int) v);
120137
}
@@ -124,6 +141,16 @@ void initByteBufferInput(long[] docs) throws Exception {
124141
byteBufferVIntIn = dir.openInput("vint", IOContext.DEFAULT);
125142
}
126143

144+
private void readGroupVIntsBaseline(DataInput in, long[] dst, int limit) throws IOException {
145+
int i;
146+
for (i = 0; i <= limit - 4; i += 4) {
147+
GroupVIntUtil.readGroupVInt(in, dst, i);
148+
}
149+
for (; i < limit; ++i) {
150+
dst[i] = in.readVInt();
151+
}
152+
}
153+
127154
@Setup(Level.Trial)
128155
public void init() throws Exception {
129156
long[] docs = new long[maxSize];
@@ -140,10 +167,12 @@ public void init() throws Exception {
140167
}
141168
initByteBufferInput(docs);
142169
initArrayInput(docs);
170+
initNioInput(docs);
171+
initByteBuffersInput(docs);
143172
}
144173

145174
@Benchmark
146-
public void byteBufferReadVInt(Blackhole bh) throws IOException {
175+
public void benchMMapDirectoryInputs_readVInt(Blackhole bh) throws IOException {
147176
byteBufferVIntIn.seek(0);
148177
for (int i = 0; i < size; i++) {
149178
values[i] = byteBufferVIntIn.readVInt();
@@ -152,14 +181,21 @@ public void byteBufferReadVInt(Blackhole bh) throws IOException {
152181
}
153182

154183
@Benchmark
155-
public void byteBufferReadGroupVInt(Blackhole bh) throws IOException {
184+
public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
156185
byteBufferGVIntIn.seek(0);
157-
GroupVIntReader.readValues(byteBufferGVIntIn, values, size);
186+
byteBufferGVIntIn.readGroupVInts(values, size);
158187
bh.consume(values);
159188
}
160189

161190
@Benchmark
162-
public void byteArrayReadVInt(Blackhole bh) {
191+
public void benchMMapDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
192+
byteBufferGVIntIn.seek(0);
193+
this.readGroupVIntsBaseline(byteBufferGVIntIn, values, size);
194+
bh.consume(values);
195+
}
196+
197+
@Benchmark
198+
public void benchByteArrayDataInput_readVInt(Blackhole bh) {
163199
byteArrayVIntIn.rewind();
164200
for (int i = 0; i < size; i++) {
165201
values[i] = byteArrayVIntIn.readVInt();
@@ -168,9 +204,37 @@ public void byteArrayReadVInt(Blackhole bh) {
168204
}
169205

170206
@Benchmark
171-
public void byteArrayReadGroupVInt(Blackhole bh) throws IOException {
207+
public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException {
172208
byteArrayGVIntIn.rewind();
173-
GroupVIntReader.readValues(byteArrayGVIntIn, values, size);
209+
byteArrayGVIntIn.readGroupVInts(values, size);
210+
bh.consume(values);
211+
}
212+
213+
@Benchmark
214+
public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
215+
nioGVIntIn.seek(0);
216+
nioGVIntIn.readGroupVInts(values, size);
217+
bh.consume(values);
218+
}
219+
220+
@Benchmark
221+
public void benchNIOFSDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
222+
nioGVIntIn.seek(0);
223+
this.readGroupVIntsBaseline(nioGVIntIn, values, size);
224+
bh.consume(values);
225+
}
226+
227+
@Benchmark
228+
public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException {
229+
byteBuffersGVIntIn.seek(0);
230+
byteBuffersGVIntIn.readGroupVInts(values, size);
231+
bh.consume(values);
232+
}
233+
234+
@Benchmark
235+
public void benchByteBuffersIndexInput_readGroupVIntBaseline(Blackhole bh) throws IOException {
236+
byteBuffersGVIntIn.seek(0);
237+
this.readGroupVIntsBaseline(byteBuffersGVIntIn, values, size);
174238
bh.consume(values);
175239
}
176240
}

lucene/core/src/java/org/apache/lucene/codecs/lucene99/GroupVIntReader.java

Lines changed: 0 additions & 57 deletions
This file was deleted.

lucene/core/src/java/org/apache/lucene/codecs/lucene99/GroupVIntWriter.java

Lines changed: 0 additions & 63 deletions
This file was deleted.

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ static void readVIntBlock(
149149
boolean indexHasFreq,
150150
boolean decodeFreq)
151151
throws IOException {
152-
GroupVIntReader.readValues(docIn, docBuffer, num);
152+
docIn.readGroupVInts(docBuffer, num);
153153
if (indexHasFreq && decodeFreq) {
154154
for (int i = 0; i < num; ++i) {
155155
freqBuffer[i] = docBuffer[i] & 0x01;

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
9292
private final PForUtil pforUtil;
9393
private final ForDeltaUtil forDeltaUtil;
9494
private final Lucene99SkipWriter skipWriter;
95-
private final GroupVIntWriter docGroupVIntWriter;
9695

9796
private boolean fieldHasNorms;
9897
private NumericDocValues norms;
@@ -173,7 +172,6 @@ public Lucene99PostingsWriter(SegmentWriteState state) throws IOException {
173172
skipWriter =
174173
new Lucene99SkipWriter(
175174
MAX_SKIP_LEVELS, BLOCK_SIZE, state.segmentInfo.maxDoc(), docOut, posOut, payOut);
176-
docGroupVIntWriter = new GroupVIntWriter();
177175
}
178176

179177
@Override
@@ -378,7 +376,7 @@ public void finishTerm(BlockTermState _state) throws IOException {
378376
docDeltaBuffer[i] = (docDeltaBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0);
379377
}
380378
}
381-
docGroupVIntWriter.writeValues(docOut, docDeltaBuffer, docBufferUpto);
379+
docOut.writeGroupVInts(docDeltaBuffer, docBufferUpto);
382380
if (writeFreqs) {
383381
for (int i = 0; i < docBufferUpto; i++) {
384382
final int freq = (int) freqBuffer[i];

lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.IOException;
2121
import java.nio.ByteBuffer;
2222
import java.nio.ByteOrder;
23+
import org.apache.lucene.util.GroupVIntUtil;
2324

2425
/** Base implementation class for buffered {@link IndexInput}. */
2526
public abstract class BufferedIndexInput extends IndexInput implements RandomAccessInput {
@@ -149,6 +150,16 @@ public final int readInt() throws IOException {
149150
}
150151
}
151152

153+
@Override
154+
protected void readGroupVInt(long[] dst, int offset) throws IOException {
155+
final int len =
156+
GroupVIntUtil.readGroupVInt(
157+
this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset);
158+
if (len > 0) {
159+
buffer.position(buffer.position() + len);
160+
}
161+
}
162+
152163
@Override
153164
public final long readLong() throws IOException {
154165
if (Long.BYTES <= buffer.remaining()) {

lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Locale;
3030
import java.util.stream.Collectors;
3131
import org.apache.lucene.util.Accountable;
32+
import org.apache.lucene.util.GroupVIntUtil;
3233
import org.apache.lucene.util.RamUsageEstimator;
3334

3435
/**
@@ -212,6 +213,25 @@ public long readLong() throws IOException {
212213
}
213214
}
214215

216+
@Override
217+
protected void readGroupVInt(long[] dst, int offset) throws IOException {
218+
final ByteBuffer block = blocks[blockIndex(pos)];
219+
final int blockOffset = blockOffset(pos);
220+
// We MUST save the return value to local variable, could not use pos += readGroupVInt(...).
221+
// because `pos +=` in java will move current value(not address) of pos to register first,
222+
// then call the function, but we will update pos value in function via readByte(), then
223+
// `pos +=` will use an old pos value plus return value, thereby missing 1 byte.
224+
final int len =
225+
GroupVIntUtil.readGroupVInt(
226+
this,
227+
block.limit() - blockOffset,
228+
p -> block.getInt((int) p),
229+
blockOffset,
230+
dst,
231+
offset);
232+
pos += len;
233+
}
234+
215235
@Override
216236
public long length() {
217237
return length;

0 commit comments

Comments
 (0)