Skip to content

Commit e043687

Browse files
authored
LUCENE-9907: Move PackedInts#getReaderNoHeader() to backwards codec
1 parent b0662c8 commit e043687

File tree

15 files changed

+803
-169
lines changed

15 files changed

+803
-169
lines changed

gradle/generation/moman/gen_Packed64SingleBlock.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@
3737
*/
3838
package org.apache.lucene.util.packed;
3939
40-
import java.io.IOException;
4140
import java.util.Arrays;
42-
43-
import org.apache.lucene.store.DataInput;
4441
import org.apache.lucene.util.RamUsageEstimator;
4542
4643
/**
@@ -221,15 +218,6 @@
221218
+ ",size=" + size() + ",blocks=" + blocks.length + ")";
222219
}
223220
224-
public static Packed64SingleBlock create(DataInput in,
225-
int valueCount, int bitsPerValue) throws IOException {
226-
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
227-
for (int i = 0; i < reader.blocks.length; ++i) {
228-
reader.blocks[i] = in.readLong();
229-
}
230-
return reader;
231-
}
232-
233221
""" % (SUPPORTED_BITS_PER_VALUE[-1], ", ".join(map(str, SUPPORTED_BITS_PER_VALUE)))
234222

235223
FOOTER = "}"

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,9 @@ Other
299299

300300
* LUCENE-9705: Make new versions of all index formats for the Lucene90 codec and move
301301
the existing ones to the backwards codecs. (Julie Tibshirani, Ignacio Vera)
302+
303+
* LUCENE-9907: Remove dependency on PackedInts#getReader() from the current codecs and move the
304+
method to backwards codec. (Ignacio Vera)
302305

303306
======================= Lucene 8.9.0 =======================
304307

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/LegacyFieldsIndexReader.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import static org.apache.lucene.util.BitUtil.zigZagDecode;
2020

2121
import java.io.IOException;
22+
import org.apache.lucene.backward_codecs.packed.LegacyPackedInts;
2223
import org.apache.lucene.index.CorruptIndexException;
2324
import org.apache.lucene.index.SegmentInfo;
2425
import org.apache.lucene.store.IndexInput;
@@ -74,7 +75,7 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
7475
"Corrupted bitsPerDocBase: " + bitsPerDocBase, fieldsIndexIn);
7576
}
7677
docBasesDeltas[blockCount] =
77-
PackedInts.getReaderNoHeader(
78+
LegacyPackedInts.getReaderNoHeader(
7879
fieldsIndexIn,
7980
PackedInts.Format.PACKED,
8081
packedIntsVersion,
@@ -90,7 +91,7 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
9091
"Corrupted bitsPerStartPointer: " + bitsPerStartPointer, fieldsIndexIn);
9192
}
9293
startPointersDeltas[blockCount] =
93-
PackedInts.getReaderNoHeader(
94+
LegacyPackedInts.getReaderNoHeader(
9495
fieldsIndexIn,
9596
PackedInts.Format.PACKED,
9697
packedIntsVersion,

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.io.IOException;
2020
import java.util.Iterator;
2121
import java.util.NoSuchElementException;
22+
import org.apache.lucene.backward_codecs.packed.LegacyPackedInts;
2223
import org.apache.lucene.codecs.CodecUtil;
2324
import org.apache.lucene.codecs.TermVectorsReader;
2425
import org.apache.lucene.codecs.compressing.CompressionMode;
@@ -403,12 +404,12 @@ public Fields get(int doc) throws IOException {
403404
{
404405
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
405406
final PackedInts.Reader allFieldNumOffs =
406-
PackedInts.getReaderNoHeader(
407+
LegacyPackedInts.getReaderNoHeader(
407408
vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
408409
switch (vectorsStream.readVInt()) {
409410
case 0:
410411
final PackedInts.Reader fieldFlags =
411-
PackedInts.getReaderNoHeader(
412+
LegacyPackedInts.getReaderNoHeader(
412413
vectorsStream,
413414
PackedInts.Format.PACKED,
414415
packedIntsVersion,
@@ -425,7 +426,7 @@ public Fields get(int doc) throws IOException {
425426
break;
426427
case 1:
427428
flags =
428-
PackedInts.getReaderNoHeader(
429+
LegacyPackedInts.getReaderNoHeader(
429430
vectorsStream,
430431
PackedInts.Format.PACKED,
431432
packedIntsVersion,
@@ -446,7 +447,7 @@ public Fields get(int doc) throws IOException {
446447
{
447448
final int bitsRequired = vectorsStream.readVInt();
448449
numTerms =
449-
PackedInts.getReaderNoHeader(
450+
LegacyPackedInts.getReaderNoHeader(
450451
vectorsStream,
451452
PackedInts.Format.PACKED,
452453
packedIntsVersion,
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.backward_codecs.packed;
18+
19+
import java.io.IOException;
20+
import org.apache.lucene.store.DataInput;
21+
import org.apache.lucene.util.RamUsageEstimator;
22+
import org.apache.lucene.util.packed.PackedInts;
23+
24+
/**
25+
* Immutable version of {@code Packed64} which is constructed from am existing {@link DataInput}.
26+
*/
27+
class LegacyPacked64 extends PackedInts.Reader {
28+
static final int BLOCK_SIZE = 64; // 32 = int, 64 = long
29+
static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE
30+
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
31+
32+
/** Values are stores contiguously in the blocks array. */
33+
private final long[] blocks;
34+
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
35+
private final long maskRight;
36+
/** Optimization: Saves one lookup in {@link #get(int)}. */
37+
private final int bpvMinusBlockSize;
38+
/** number of values */
39+
protected final int valueCount;
40+
/** bits per value. */
41+
protected final int bitsPerValue;
42+
43+
/**
44+
* Creates an array with content retrieved from the given DataInput.
45+
*
46+
* @param in a DataInput, positioned at the start of Packed64-content.
47+
* @param valueCount the number of elements.
48+
* @param bitsPerValue the number of bits available for any given value.
49+
* @throws IOException if the values for the backing array could not be retrieved.
50+
*/
51+
public LegacyPacked64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue)
52+
throws IOException {
53+
this.valueCount = valueCount;
54+
this.bitsPerValue = bitsPerValue;
55+
final PackedInts.Format format = PackedInts.Format.PACKED;
56+
final long byteCount =
57+
format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
58+
final int longCount =
59+
format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
60+
blocks = new long[longCount];
61+
// read as many longs as we can
62+
for (int i = 0; i < byteCount / 8; ++i) {
63+
blocks[i] = in.readLong();
64+
}
65+
final int remaining = (int) (byteCount % 8);
66+
if (remaining != 0) {
67+
// read the last bytes
68+
long lastLong = 0;
69+
for (int i = 0; i < remaining; ++i) {
70+
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
71+
}
72+
blocks[blocks.length - 1] = lastLong;
73+
}
74+
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
75+
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
76+
}
77+
78+
@Override
79+
public final int size() {
80+
return valueCount;
81+
}
82+
83+
@Override
84+
public long get(final int index) {
85+
// The abstract index in a bit stream
86+
final long majorBitPos = (long) index * bitsPerValue;
87+
// The index in the backing long-array
88+
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS);
89+
// The number of value-bits in the second long
90+
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
91+
92+
if (endBits <= 0) { // Single block
93+
return (blocks[elementPos] >>> -endBits) & maskRight;
94+
}
95+
// Two blocks
96+
return ((blocks[elementPos] << endBits) | (blocks[elementPos + 1] >>> (BLOCK_SIZE - endBits)))
97+
& maskRight;
98+
}
99+
100+
@Override
101+
public int get(int index, long[] arr, int off, int len) {
102+
assert len > 0 : "len must be > 0 (got " + len + ")";
103+
assert index >= 0 && index < valueCount;
104+
len = Math.min(len, valueCount - index);
105+
assert off + len <= arr.length;
106+
107+
final int originalIndex = index;
108+
final PackedInts.Decoder decoder =
109+
PackedInts.getDecoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsPerValue);
110+
111+
// go to the next block where the value does not span across two blocks
112+
final int offsetInBlocks = index % decoder.longValueCount();
113+
if (offsetInBlocks != 0) {
114+
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
115+
arr[off++] = get(index++);
116+
--len;
117+
}
118+
if (len == 0) {
119+
return index - originalIndex;
120+
}
121+
}
122+
123+
// bulk get
124+
assert index % decoder.longValueCount() == 0;
125+
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
126+
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
127+
final int iterations = len / decoder.longValueCount();
128+
decoder.decode(blocks, blockIndex, arr, off, iterations);
129+
final int gotValues = iterations * decoder.longValueCount();
130+
index += gotValues;
131+
len -= gotValues;
132+
assert len >= 0;
133+
134+
if (index > originalIndex) {
135+
// stay at the block boundary
136+
return index - originalIndex;
137+
} else {
138+
// no progress so far => already at a block boundary but no full block to get
139+
assert index == originalIndex;
140+
return super.get(index, arr, off, len);
141+
}
142+
}
143+
144+
@Override
145+
public String toString() {
146+
return "LegacyPacked64(bitsPerValue="
147+
+ bitsPerValue
148+
+ ",size="
149+
+ size()
150+
+ ",blocks="
151+
+ blocks.length
152+
+ ")";
153+
}
154+
155+
@Override
156+
public long ramBytesUsed() {
157+
return RamUsageEstimator.alignObjectSize(
158+
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
159+
+ 3 * Integer.BYTES // bpvMinusBlockSize,valueCount,bitsPerValue
160+
+ Long.BYTES // maskRight
161+
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
162+
+ RamUsageEstimator.sizeOf(blocks);
163+
}
164+
}

0 commit comments

Comments
 (0)