Skip to content

Commit b44d645

Browse files
Optimize PForUtil.encode() with histogram-based bit selection (#15165)
1 parent ce42d82 commit b44d645

File tree

4 files changed

+62
-27
lines changed

4 files changed

+62
-27
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ Optimizations
131131

132132
* GITHUB#14998: Speed up flushing of live docs. (Adrien Grand)
133133

134+
* GITHUB#15165: Optimize PForUtil.encode() with histogram-based bit selection. (Ramakrishna Chilaka)
135+
134136
* GITHUB#15151: Use `SimScorer#score` bulk API to compute impact scores per
135137
block of postings. (Adrien Grand)
136138

lucene/core/src/java/org/apache/lucene/codecs/lucene103/PForUtil.java

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
2222
import org.apache.lucene.store.DataInput;
2323
import org.apache.lucene.store.DataOutput;
24-
import org.apache.lucene.util.LongHeap;
2524
import org.apache.lucene.util.packed.PackedInts;
2625

2726
/** Utility class to encode sequences of 128 small positive integers. */
@@ -46,34 +45,32 @@ static boolean allEqual(int[] l) {
4645

4746
/** Encode 128 integers from {@code ints} into {@code out}. */
4847
void encode(int[] ints, DataOutput out) throws IOException {
49-
// Determine the top MAX_EXCEPTIONS + 1 values
50-
final LongHeap top = new LongHeap(MAX_EXCEPTIONS + 1);
51-
for (int i = 0; i <= MAX_EXCEPTIONS; ++i) {
52-
top.push(ints[i]);
53-
}
54-
long topValue = top.top();
55-
for (int i = MAX_EXCEPTIONS + 1; i < ForUtil.BLOCK_SIZE; ++i) {
56-
if (ints[i] > topValue) {
57-
topValue = top.updateTop(ints[i]);
58-
}
59-
}
60-
61-
long max = 0L;
62-
for (int i = 1; i <= top.size(); ++i) {
63-
max = Math.max(max, top.get(i));
48+
// histogram of bit widths
49+
final int[] histogram = new int[32];
50+
int maxBitsRequired = 0;
51+
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
52+
final int v = ints[i];
53+
final int bits = PackedInts.bitsRequired(v);
54+
histogram[bits]++;
55+
maxBitsRequired = Math.max(maxBitsRequired, bits);
6456
}
6557

66-
final int maxBitsRequired = PackedInts.bitsRequired(max);
67-
// We store the patch on a byte, so we can't decrease the number of bits required by more than 8
68-
final int patchedBitsRequired =
69-
Math.max(PackedInts.bitsRequired(topValue), maxBitsRequired - 8);
58+
// We store patch on a byte, so we can't decrease bits by more than 8
59+
final int minBits = Math.max(0, maxBitsRequired - 8);
60+
int cumulativeExceptions = 0;
61+
int patchedBitsRequired = maxBitsRequired;
7062
int numExceptions = 0;
71-
final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1;
72-
for (int i = 2; i <= top.size(); ++i) {
73-
if (top.get(i) > maxUnpatchedValue) {
74-
numExceptions++;
63+
64+
for (int b = maxBitsRequired; b >= minBits; --b) {
65+
if (cumulativeExceptions > MAX_EXCEPTIONS) {
66+
break;
7567
}
68+
patchedBitsRequired = b;
69+
numExceptions = cumulativeExceptions;
70+
cumulativeExceptions += histogram[b];
7671
}
72+
73+
final int maxUnpatchedValue = (1 << patchedBitsRequired) - 1;
7774
final byte[] exceptions = new byte[numExceptions * 2];
7875
if (numExceptions > 0) {
7976
int exceptionCount = 0;
@@ -91,7 +88,7 @@ void encode(int[] ints, DataOutput out) throws IOException {
9188
if (allEqual(ints) && maxBitsRequired <= 8) {
9289
for (int i = 0; i < numExceptions; ++i) {
9390
exceptions[2 * i + 1] =
94-
(byte) (Byte.toUnsignedLong(exceptions[2 * i + 1]) << patchedBitsRequired);
91+
(byte) (Byte.toUnsignedInt(exceptions[2 * i + 1]) << patchedBitsRequired);
9592
}
9693
out.writeByte((byte) (numExceptions << 5));
9794
out.writeVInt(ints[0]);
@@ -115,7 +112,7 @@ void decode(PostingDecodingUtil pdu, int[] ints) throws IOException {
115112
}
116113
final int numExceptions = token >>> 5;
117114
for (int i = 0; i < numExceptions; ++i) {
118-
ints[Byte.toUnsignedInt(in.readByte())] |= Byte.toUnsignedLong(in.readByte()) << bitsPerValue;
115+
ints[Byte.toUnsignedInt(in.readByte())] |= Byte.toUnsignedInt(in.readByte()) << bitsPerValue;
119116
}
120117
}
121118

lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,33 @@ public static int unsignedBitsRequired(long bits) {
792792
return Math.max(1, 64 - Long.numberOfLeadingZeros(bits));
793793
}
794794

795+
/**
796+
* Returns how many bits are required to hold values up to and including maxValue NOTE: This
797+
* method returns at least 1.
798+
*
799+
* @param maxValue the maximum int value that should be representable.
800+
* @return the amount of bits needed to represent values from 0 to maxValue.
801+
* @lucene.internal
802+
*/
803+
public static int bitsRequired(int maxValue) {
804+
if (maxValue < 0) {
805+
throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
806+
}
807+
return unsignedBitsRequired(maxValue);
808+
}
809+
810+
/**
811+
* Returns how many bits are required to store <code>bits</code>, interpreted as an unsigned
812+
* value. NOTE: This method returns at least 1.
813+
*
814+
* @param bits the int value to be stored, interpreted as unsigned.
815+
* @return the amount of bits needed to represent the unsigned value.
816+
* @lucene.internal
817+
*/
818+
public static int unsignedBitsRequired(int bits) {
819+
return Math.max(1, 32 - Integer.numberOfLeadingZeros(bits));
820+
}
821+
795822
/**
796823
* Calculates the maximum unsigned long that can be expressed with the given number of bits.
797824
*

lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,17 @@ public void testBitsRequired() {
7575
assertEquals(61, PackedInts.bitsRequired(0x1FFFFFFFFFFFFFFFL));
7676
assertEquals(62, PackedInts.bitsRequired(0x3FFFFFFFFFFFFFFFL));
7777
assertEquals(63, PackedInts.bitsRequired(0x7FFFFFFFFFFFFFFFL));
78-
assertEquals(64, PackedInts.unsignedBitsRequired(-1));
78+
assertEquals(64, PackedInts.unsignedBitsRequired(-1L));
7979
assertEquals(64, PackedInts.unsignedBitsRequired(Long.MIN_VALUE));
80+
assertEquals(1, PackedInts.bitsRequired(0L));
81+
}
82+
83+
public void testBitsRequiredInt() {
84+
assertEquals(29, PackedInts.bitsRequired((int) Math.pow(2, 29) - 1));
85+
assertEquals(30, PackedInts.bitsRequired(0x3FFFFFFF));
86+
assertEquals(31, PackedInts.bitsRequired(0x7FFFFFFF));
87+
assertEquals(32, PackedInts.unsignedBitsRequired(-1));
88+
assertEquals(32, PackedInts.unsignedBitsRequired(Integer.MIN_VALUE));
8089
assertEquals(1, PackedInts.bitsRequired(0));
8190
}
8291

0 commit comments

Comments
 (0)