Skip to content

Commit 514e98e

Browse files
authored
[DiskBBQ] Clean up of DocIdsWriter (#134539)
1 parent 0c242ae commit 514e98e

File tree

2 files changed

+5
-53
lines changed

2 files changed

+5
-53
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriter.java

Lines changed: 3 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@
1818
*/
1919
package org.elasticsearch.index.codec.vectors.diskbbq;
2020

21-
import org.apache.lucene.index.PointValues.IntersectVisitor;
2221
import org.apache.lucene.store.DataOutput;
2322
import org.apache.lucene.store.IndexInput;
24-
import org.apache.lucene.util.IntsRef;
2523
import org.apache.lucene.util.hnsw.IntToIntFunction;
2624

2725
import java.io.IOException;
@@ -33,7 +31,6 @@
3331
* <p>It is copied from the BKD implementation.
3432
*/
3533
final class DocIdsWriter {
36-
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 512;
3734

3835
private static final byte CONTINUOUS_IDS = (byte) -2;
3936
private static final byte DELTA_BPV_16 = (byte) 16;
@@ -43,22 +40,6 @@ final class DocIdsWriter {
4340

4441
private int[] scratch = new int[0];
4542

46-
/**
47-
* IntsRef to be used to iterate over the scratch buffer. A single instance is reused to avoid
48-
* re-allocating the object. The ints and length fields need to be reset each use.
49-
*
50-
* <p>The main reason for existing is to be able to call the {@link
51-
* IntersectVisitor#visit(IntsRef)} method rather than the {@link IntersectVisitor#visit(int)}
52-
* method. This seems to make a difference in performance, probably due to fewer virtual calls
53-
* then happening (once per read call rather than once per doc).
54-
*/
55-
private final IntsRef scratchIntsRef = new IntsRef();
56-
57-
{
58-
// This is here to not rely on the default constructor of IntsRef to set offset to 0
59-
scratchIntsRef.offset = 0;
60-
}
61-
6243
DocIdsWriter() {}
6344

6445
/**
@@ -334,13 +315,7 @@ private static void readDelta16(IndexInput in, int count, int[] docIds) throws I
334315
final int min = in.readVInt();
335316
final int half = count >> 1;
336317
in.readInts(docIds, 0, half);
337-
if (count == DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
338-
// Same format, but enabling the JVM to specialize the decoding logic for the default number
339-
// of points per node proved to help on benchmarks
340-
decode16(docIds, DEFAULT_MAX_POINTS_IN_LEAF_NODE / 2, min);
341-
} else {
342-
decode16(docIds, half, min);
343-
}
318+
decode16(docIds, half, min);
344319
// read the remaining doc if count is odd.
345320
for (int i = half << 1; i < count; i++) {
346321
docIds[i] = Short.toUnsignedInt(in.readShort()) + min;
@@ -364,18 +339,7 @@ private void readInts21(IndexInput in, int count, int[] docIDs) throws IOExcepti
364339
int oneThird = floorToMultipleOf16(count / 3);
365340
int numInts = oneThird << 1;
366341
in.readInts(scratch, 0, numInts);
367-
if (count == DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
368-
// Same format, but enabling the JVM to specialize the decoding logic for the default number
369-
// of points per node proved to help on benchmarks
370-
decode21(
371-
docIDs,
372-
scratch,
373-
floorToMultipleOf16(DEFAULT_MAX_POINTS_IN_LEAF_NODE / 3),
374-
floorToMultipleOf16(DEFAULT_MAX_POINTS_IN_LEAF_NODE / 3) * 2
375-
);
376-
} else {
377-
decode21(docIDs, scratch, oneThird, numInts);
378-
}
342+
decode21(docIDs, scratch, oneThird, numInts);
379343
int i = oneThird * 3;
380344
for (; i < count - 2; i += 3) {
381345
long l = in.readLong();
@@ -401,17 +365,7 @@ private void readInts24(IndexInput in, int count, int[] docIDs) throws IOExcepti
401365
int quarter = count >> 2;
402366
int numInts = quarter * 3;
403367
in.readInts(scratch, 0, numInts);
404-
if (count == DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
405-
// Same format, but enabling the JVM to specialize the decoding logic for the default number
406-
// of points per node proved to help on benchmarks
407-
assert floorToMultipleOf16(quarter) == quarter
408-
: "We are relying on the fact that quarter of DEFAULT_MAX_POINTS_IN_LEAF_NODE"
409-
+ " is a multiple of 16 to vectorize the decoding loop,"
410-
+ " please check performance issue if you want to break this assumption.";
411-
decode24(docIDs, scratch, DEFAULT_MAX_POINTS_IN_LEAF_NODE / 4, DEFAULT_MAX_POINTS_IN_LEAF_NODE / 4 * 3);
412-
} else {
413-
decode24(docIDs, scratch, quarter, numInts);
414-
}
368+
decode24(docIDs, scratch, quarter, numInts);
415369
// Now read the remaining 0, 1, 2 or 3 values
416370
for (int i = quarter << 2; i < count; ++i) {
417371
docIDs[i] = (in.readShort() & 0xFFFF) | (in.readByte() & 0xFF) << 16;

server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriterTests.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
import java.util.List;
3737
import java.util.Set;
3838

39-
import static org.elasticsearch.index.codec.vectors.diskbbq.DocIdsWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
40-
4139
public class DocIdsWriterTests extends LuceneTestCase {
4240

4341
public void testNoDocs() throws Exception {
@@ -50,7 +48,7 @@ public void testRandom() throws Exception {
5048
int numIters = atLeast(100);
5149
try (Directory dir = newDirectory()) {
5250
for (int iter = 0; iter < numIters; ++iter) {
53-
int count = random().nextBoolean() ? 1 + random().nextInt(5000) : DEFAULT_MAX_POINTS_IN_LEAF_NODE;
51+
int count = 1 + random().nextInt(5000);
5452
int[] docIDs = new int[count];
5553
final int bpv = TestUtil.nextInt(random(), 1, 32);
5654
for (int i = 0; i < docIDs.length; ++i) {
@@ -80,7 +78,7 @@ public void testCluster() throws Exception {
8078
int numIters = atLeast(100);
8179
try (Directory dir = newDirectory()) {
8280
for (int iter = 0; iter < numIters; ++iter) {
83-
int count = random().nextBoolean() ? 1 + random().nextInt(5000) : DEFAULT_MAX_POINTS_IN_LEAF_NODE;
81+
int count = 1 + random().nextInt(5000);
8482
int[] docIDs = new int[count];
8583
int min = random().nextInt(1000);
8684
final int bpv = TestUtil.nextInt(random(), 1, 16);

0 commit comments

Comments
 (0)