Skip to content

Commit d152317

Browse files
authored
LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)
1 parent beafd11 commit d152317

File tree

3 files changed

+48
-30
lines changed

3 files changed

+48
-30
lines changed

lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,11 @@ public FieldIndexData(
278278

279279
// records offsets into main terms dict file
280280
termsDictOffsets =
281-
MonotonicBlockPackedReader.of(
282-
clone, packedIntsVersion, blocksize, numIndexTerms, false);
281+
MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, numIndexTerms);
283282

284283
// records offsets into byte[] term data
285284
termOffsets =
286-
MonotonicBlockPackedReader.of(
287-
clone, packedIntsVersion, blocksize, 1 + numIndexTerms, false);
285+
MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, 1 + numIndexTerms);
288286
} finally {
289287
clone.close();
290288
}

lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,31 +38,34 @@ static long expected(long origin, float average, int index) {
3838
return origin + (long) (average * (long) index);
3939
}
4040

41+
private static final int BLOCK_SIZE = Byte.SIZE; // #bits in a block
42+
private static final int BLOCK_BITS = 3; // The #bits representing BLOCK_SIZE
43+
private static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
44+
4145
final int blockShift, blockMask;
4246
final long valueCount;
4347
final long[] minValues;
4448
final float[] averages;
45-
final PackedInts.Reader[] subReaders;
49+
final LongValues[] subReaders;
4650
final long sumBPV;
51+
final long totalByteCount;
4752

4853
/** Sole constructor. */
4954
public static MonotonicBlockPackedReader of(
50-
IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct)
51-
throws IOException {
52-
return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount, direct);
55+
IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
56+
return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount);
5357
}
5458

5559
private MonotonicBlockPackedReader(
56-
IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct)
57-
throws IOException {
60+
IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
5861
this.valueCount = valueCount;
5962
blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
6063
blockMask = blockSize - 1;
6164
final int numBlocks = numBlocks(valueCount, blockSize);
6265
minValues = new long[numBlocks];
6366
averages = new float[numBlocks];
64-
subReaders = new PackedInts.Reader[numBlocks];
65-
long sumBPV = 0;
67+
subReaders = new LongValues[numBlocks];
68+
long sumBPV = 0, totalByteCount = 0;
6669
for (int i = 0; i < numBlocks; ++i) {
6770
minValues[i] = in.readZLong();
6871
averages[i] = Float.intBitsToFloat(in.readInt());
@@ -72,24 +75,44 @@ private MonotonicBlockPackedReader(
7275
throw new IOException("Corrupted");
7376
}
7477
if (bitsPerValue == 0) {
75-
subReaders[i] = new PackedInts.NullReader(blockSize);
78+
subReaders[i] = LongValues.ZEROES;
7679
} else {
7780
final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
78-
if (direct) {
79-
final long pointer = in.getFilePointer();
80-
subReaders[i] =
81-
PackedInts.getDirectReaderNoHeader(
82-
in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
83-
in.seek(
84-
pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
85-
} else {
86-
subReaders[i] =
87-
PackedInts.getReaderNoHeader(
88-
in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
89-
}
81+
final int byteCount =
82+
Math.toIntExact(
83+
PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
84+
totalByteCount += byteCount;
85+
final byte[] blocks = new byte[byteCount];
86+
in.readBytes(blocks, 0, byteCount);
87+
final long maskRight = ((1L << bitsPerValue) - 1);
88+
final int bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
89+
subReaders[i] =
90+
new LongValues() {
91+
@Override
92+
public long get(long index) {
93+
// The abstract index in a bit stream
94+
final long majorBitPos = index * bitsPerValue;
95+
// The offset of the first block in the backing byte-array
96+
int blockOffset = (int) (majorBitPos >>> BLOCK_BITS);
97+
// The number of value-bits after the first byte
98+
long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
99+
if (endBits <= 0) {
100+
// Single block
101+
return ((blocks[blockOffset] & 0xFFL) >>> -endBits) & maskRight;
102+
}
103+
// Multiple blocks
104+
long value = ((blocks[blockOffset++] & 0xFFL) << endBits) & maskRight;
105+
while (endBits > BLOCK_SIZE) {
106+
endBits -= BLOCK_SIZE;
107+
value |= (blocks[blockOffset++] & 0xFFL) << endBits;
108+
}
109+
return value | ((blocks[blockOffset] & 0xFFL) >>> (BLOCK_SIZE - endBits));
110+
}
111+
};
90112
}
91113
}
92114
this.sumBPV = sumBPV;
115+
this.totalByteCount = totalByteCount;
93116
}
94117

95118
@Override
@@ -110,9 +133,7 @@ public long ramBytesUsed() {
110133
long sizeInBytes = 0;
111134
sizeInBytes += RamUsageEstimator.sizeOf(minValues);
112135
sizeInBytes += RamUsageEstimator.sizeOf(averages);
113-
for (PackedInts.Reader reader : subReaders) {
114-
sizeInBytes += reader.ramBytesUsed();
115-
}
136+
sizeInBytes += totalByteCount;
116137
return sizeInBytes;
117138
}
118139

lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,8 +1371,7 @@ public void testMonotonicBlockPackedReaderWriter() throws IOException {
13711371

13721372
final IndexInput in = dir.openInput("out.bin", IOContext.DEFAULT);
13731373
final MonotonicBlockPackedReader reader =
1374-
MonotonicBlockPackedReader.of(
1375-
in, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
1374+
MonotonicBlockPackedReader.of(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
13761375
assertEquals(fp, in.getFilePointer());
13771376
for (int i = 0; i < valueCount; ++i) {
13781377
assertEquals("i=" + i, values[i], reader.get(i));

0 commit comments

Comments
 (0)