1919package org .elasticsearch .index .codec .vectors ;
2020
2121import org .apache .lucene .index .PointValues .IntersectVisitor ;
22- import org .apache .lucene .search .DocIdSetIterator ;
2322import org .apache .lucene .store .DataOutput ;
2423import org .apache .lucene .store .IndexInput ;
25- import org .apache .lucene .util .ArrayUtil ;
26- import org .apache .lucene .util .DocBaseBitSetIterator ;
27- import org .apache .lucene .util .FixedBitSet ;
2824import org .apache .lucene .util .IntsRef ;
2925import org .apache .lucene .util .LongsRef ;
3026import org .apache .lucene .util .hnsw .IntToIntFunction ;
3127
3228import java .io .IOException ;
33- import java .util .Arrays ;
3429
3530/**
3631 * This class is used to write and read the doc ids in a compressed format. The format is optimized
@@ -42,7 +37,6 @@ final class DocIdsWriter {
4237 public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 512 ;
4338
4439 private static final byte CONTINUOUS_IDS = (byte ) -2 ;
45- private static final byte BITSET_IDS = (byte ) -1 ;
4640 private static final byte DELTA_BPV_16 = (byte ) 16 ;
4741 private static final byte BPV_21 = (byte ) 21 ;
4842 private static final byte BPV_24 = (byte ) 24 ;
@@ -92,21 +86,11 @@ void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOEx
9286 }
9387
9488 int min2max = max - min + 1 ;
95- if (strictlySorted ) {
96- if (min2max == count ) {
97- // continuous ids, typically happens when segment is sorted
98- out .writeByte (CONTINUOUS_IDS );
99- out .writeVInt (docIds .apply (0 ));
100- return ;
101- } else if (min2max <= (count << 4 )) {
102- assert min2max > count : "min2max: " + min2max + ", count: " + count ;
103- // Only trigger bitset optimization when max - min + 1 <= 16 * count in order to avoid
104- // expanding too much storage.
105- // A field with lower cardinality will have higher probability to trigger this optimization.
106- out .writeByte (BITSET_IDS );
107- writeIdsAsBitSet (docIds , count , out );
108- return ;
109- }
89+ if (strictlySorted && min2max == count ) {
90+ // continuous ids, typically happens when segment is sorted
91+ out .writeByte (CONTINUOUS_IDS );
92+ out .writeVInt (docIds .apply (0 ));
93+ return ;
11094 }
11195
11296 if (min2max <= 0xFFFF ) {
@@ -180,38 +164,6 @@ void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOEx
180164 }
181165 }
182166
183- private static void writeIdsAsBitSet (IntToIntFunction docIds , int count , DataOutput out ) throws IOException {
184- int min = docIds .apply (0 );
185- int max = docIds .apply (count - 1 );
186-
187- final int offsetWords = min >> 6 ;
188- final int offsetBits = offsetWords << 6 ;
189- final int totalWordCount = FixedBitSet .bits2words (max - offsetBits + 1 );
190- long currentWord = 0 ;
191- int currentWordIndex = 0 ;
192-
193- out .writeVInt (offsetWords );
194- out .writeVInt (totalWordCount );
195- // build bit set streaming
196- for (int i = 0 ; i < count ; i ++) {
197- final int index = docIds .apply (i ) - offsetBits ;
198- final int nextWordIndex = index >> 6 ;
199- assert currentWordIndex <= nextWordIndex ;
200- if (currentWordIndex < nextWordIndex ) {
201- out .writeLong (currentWord );
202- currentWord = 0L ;
203- currentWordIndex ++;
204- while (currentWordIndex < nextWordIndex ) {
205- currentWordIndex ++;
206- out .writeLong (0L );
207- }
208- }
209- currentWord |= 1L << index ;
210- }
211- out .writeLong (currentWord );
212- assert currentWordIndex + 1 == totalWordCount ;
213- }
214-
215167 /** Read {@code count} integers into {@code docIDs}. */
216168 void readInts (IndexInput in , int count , int [] docIDs ) throws IOException {
217169 if (count == 0 ) {
@@ -225,9 +177,6 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
225177 case CONTINUOUS_IDS :
226178 readContinuousIds (in , count , docIDs );
227179 break ;
228- case BITSET_IDS :
229- readBitSet (in , count , docIDs );
230- break ;
231180 case DELTA_BPV_16 :
232181 readDelta16 (in , count , docIDs );
233182 break ;
@@ -245,36 +194,13 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
245194 }
246195 }
247196
248- private DocIdSetIterator readBitSetIterator (IndexInput in , int count ) throws IOException {
249- int offsetWords = in .readVInt ();
250- int longLen = in .readVInt ();
251- scratchLongs .longs = ArrayUtil .growNoCopy (scratchLongs .longs , longLen );
252- in .readLongs (scratchLongs .longs , 0 , longLen );
253- // make ghost bits clear for FixedBitSet.
254- if (longLen < scratchLongs .length ) {
255- Arrays .fill (scratchLongs .longs , longLen , scratchLongs .longs .length , 0 );
256- }
257- scratchLongs .length = longLen ;
258- FixedBitSet bitSet = new FixedBitSet (scratchLongs .longs , longLen << 6 );
259- return new DocBaseBitSetIterator (bitSet , count , offsetWords << 6 );
260- }
261-
262197 private static void readContinuousIds (IndexInput in , int count , int [] docIDs ) throws IOException {
263198 int start = in .readVInt ();
264199 for (int i = 0 ; i < count ; i ++) {
265200 docIDs [i ] = start + i ;
266201 }
267202 }
268203
269- private void readBitSet (IndexInput in , int count , int [] docIDs ) throws IOException {
270- DocIdSetIterator iterator = readBitSetIterator (in , count );
271- int docId , pos = 0 ;
272- while ((docId = iterator .nextDoc ()) != DocIdSetIterator .NO_MORE_DOCS ) {
273- docIDs [pos ++] = docId ;
274- }
275- assert pos == count : "pos: " + pos + ", count: " + count ;
276- }
277-
278204 private static void readDelta16 (IndexInput in , int count , int [] docIds ) throws IOException {
279205 final int min = in .readVInt ();
280206 final int half = count >> 1 ;
0 commit comments