2121import org .apache .lucene .internal .vectorization .PostingDecodingUtil ;
2222import org .apache .lucene .store .DataInput ;
2323import org .apache .lucene .store .DataOutput ;
24- import org .apache .lucene .util .LongHeap ;
2524import org .apache .lucene .util .packed .PackedInts ;
2625
2726/** Utility class to encode sequences of 128 small positive integers. */
@@ -46,34 +45,32 @@ static boolean allEqual(int[] l) {
4645
4746 /** Encode 128 integers from {@code ints} into {@code out}. */
4847 void encode (int [] ints , DataOutput out ) throws IOException {
49- // Determine the top MAX_EXCEPTIONS + 1 values
50- final LongHeap top = new LongHeap (MAX_EXCEPTIONS + 1 );
51- for (int i = 0 ; i <= MAX_EXCEPTIONS ; ++i ) {
52- top .push (ints [i ]);
53- }
54- long topValue = top .top ();
55- for (int i = MAX_EXCEPTIONS + 1 ; i < ForUtil .BLOCK_SIZE ; ++i ) {
56- if (ints [i ] > topValue ) {
57- topValue = top .updateTop (ints [i ]);
58- }
59- }
60-
61- long max = 0L ;
62- for (int i = 1 ; i <= top .size (); ++i ) {
63- max = Math .max (max , top .get (i ));
48+ // histogram of bit widths
49+ final int [] histogram = new int [32 ];
50+ int maxBitsRequired = 0 ;
51+ for (int i = 0 ; i < ForUtil .BLOCK_SIZE ; ++i ) {
52+ final int v = ints [i ];
53+ final int bits = PackedInts .bitsRequired (v );
54+ histogram [bits ]++;
55+ maxBitsRequired = Math .max (maxBitsRequired , bits );
6456 }
6557
66- final int maxBitsRequired = PackedInts . bitsRequired ( max );
67- // We store the patch on a byte, so we can't decrease the number of bits required by more than 8
68- final int patchedBitsRequired =
69- Math . max ( PackedInts . bitsRequired ( topValue ), maxBitsRequired - 8 ) ;
58+ // We store patch on a byte, so we can't decrease bits by more than 8
59+ final int minBits = Math . max ( 0 , maxBitsRequired - 8 );
60+ int cumulativeExceptions = 0 ;
61+ int patchedBitsRequired = maxBitsRequired ;
7062 int numExceptions = 0 ;
71- final long maxUnpatchedValue = ( 1L << patchedBitsRequired ) - 1 ;
72- for (int i = 2 ; i <= top . size (); ++ i ) {
73- if (top . get ( i ) > maxUnpatchedValue ) {
74- numExceptions ++ ;
63+
64+ for (int b = maxBitsRequired ; b >= minBits ; -- b ) {
65+ if (cumulativeExceptions > MAX_EXCEPTIONS ) {
66+ break ;
7567 }
68+ patchedBitsRequired = b ;
69+ numExceptions = cumulativeExceptions ;
70+ cumulativeExceptions += histogram [b ];
7671 }
72+
73+ final int maxUnpatchedValue = (1 << patchedBitsRequired ) - 1 ;
7774 final byte [] exceptions = new byte [numExceptions * 2 ];
7875 if (numExceptions > 0 ) {
7976 int exceptionCount = 0 ;
@@ -91,7 +88,7 @@ void encode(int[] ints, DataOutput out) throws IOException {
9188 if (allEqual (ints ) && maxBitsRequired <= 8 ) {
9289 for (int i = 0 ; i < numExceptions ; ++i ) {
9390 exceptions [2 * i + 1 ] =
94- (byte ) (Byte .toUnsignedLong (exceptions [2 * i + 1 ]) << patchedBitsRequired );
91+ (byte ) (Byte .toUnsignedInt (exceptions [2 * i + 1 ]) << patchedBitsRequired );
9592 }
9693 out .writeByte ((byte ) (numExceptions << 5 ));
9794 out .writeVInt (ints [0 ]);
@@ -115,7 +112,7 @@ void decode(PostingDecodingUtil pdu, int[] ints) throws IOException {
115112 }
116113 final int numExceptions = token >>> 5 ;
117114 for (int i = 0 ; i < numExceptions ; ++i ) {
118- ints [Byte .toUnsignedInt (in .readByte ())] |= Byte .toUnsignedLong (in .readByte ()) << bitsPerValue ;
115+ ints [Byte .toUnsignedInt (in .readByte ())] |= Byte .toUnsignedInt (in .readByte ()) << bitsPerValue ;
119116 }
120117 }
121118
0 commit comments