@@ -74,21 +74,10 @@ public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat {
7474
7575 // We use prime numbers with the Kirsch-Mitzenmacher technique to obtain multiple hashes from two hash functions
7676 private static final int [] PRIMES = new int [] { 2 , 5 , 11 , 17 , 23 , 29 , 41 , 47 , 53 , 59 , 71 };
77- private static final int [] powerOfTwoBitSetSizes ;
7877 private static final int DEFAULT_NUM_HASH_FUNCTIONS = 7 ;
7978 private static final byte BLOOM_FILTER_STORED = 1 ;
8079 private static final byte BLOOM_FILTER_NOT_STORED = 0 ;
81-
82- static {
83- // Precompute powers of two (2^1 to 2^26) for efficient modulo operations using bitwise AND.
84- // We start from 2^1 (2 bits) and go up to 2^26 (67,108,864 bits / 8,388,608 bytes = 8 MB)
85- // as the maximum, staying within positive int range.
86- powerOfTwoBitSetSizes = new int [27 ];
87- for (int i = 0 ; i < powerOfTwoBitSetSizes .length ; i ++) {
88- powerOfTwoBitSetSizes [i ] = 1 << i ;
89- assert powerOfTwoBitSetSizes [i ] > 0 ;
90- }
91- }
80+ private static final ByteSizeValue MAX_BLOOM_FILTER_SIZE = ByteSizeValue .ofMb (8 );
9281
9382 private final BigArrays bigArrays ;
9483 private final String segmentSuffix ;
@@ -109,15 +98,23 @@ public ES93BloomFilterStoredFieldsFormat(
10998 this .delegate = delegate ;
11099 this .bloomFilterFieldName = bloomFilterFieldName ;
111100 this .numHashFunctions = DEFAULT_NUM_HASH_FUNCTIONS ;
112- int bloomFilterSizeInBits = 0 ;
113- // Find the closest power of 2 that fits the required size
114- for (int powerOfTwoBitSetSize : powerOfTwoBitSetSizes ) {
115- if (powerOfTwoBitSetSize <= (Math .multiplyExact (bloomFilterSize .getBytes (), Byte .SIZE ))) {
116- bloomFilterSizeInBits = powerOfTwoBitSetSize ;
117- }
101+
102+ if (bloomFilterSize .getBytes () <= 0 ) {
103+ throw new IllegalArgumentException ("bloom filter size must be greater than 0" );
118104 }
119- assert bloomFilterSizeInBits > 0 ;
120- this .bloomFilterSizeInBits = bloomFilterSizeInBits ;
105+
106+ var closestPowerOfTwoBloomFilterSizeInBytes = Long .highestOneBit (bloomFilterSize .getBytes ());
107+ if (closestPowerOfTwoBloomFilterSizeInBytes > MAX_BLOOM_FILTER_SIZE .getBytes ()) {
108+ throw new IllegalArgumentException (
109+ "bloom filter size ["
110+ + bloomFilterSize
111+ + "] is too large; "
112+ + "must be "
113+ + MAX_BLOOM_FILTER_SIZE
114+ + " or less (rounded to nearest power of two)"
115+ );
116+ }
117+ this .bloomFilterSizeInBits = (int ) Math .multiplyExact (closestPowerOfTwoBloomFilterSizeInBytes , Byte .SIZE );
121118 }
122119
123120 @ Override
@@ -165,7 +162,7 @@ static class Writer extends StoredFieldsWriter {
165162 String bloomFilterFieldName ,
166163 StoredFieldsWriter delegateWriter
167164 ) throws IOException {
168- assert isPowerOfTwo (bloomFilterSizeInBits ) : "Bloom filter size is not a power of 2" ;
165+ assert isPowerOfTwo (bloomFilterSizeInBits ) : "Bloom filter size is not a power of 2: " + bloomFilterSizeInBits ;
169166 assert numHashFunctions <= PRIMES .length
170167 : "Number of hash functions must be <= " + PRIMES .length + " but was " + numHashFunctions ;
171168
@@ -223,43 +220,60 @@ public void finishDocument() throws IOException {
223220
224221 @ Override
225222 public void writeField (FieldInfo info , int value ) throws IOException {
226- delegateWriter .writeField (info , value );
223+ if (isBloomFilterField (info ) == false ) {
224+ delegateWriter .writeField (info , value );
225+ }
227226 }
228227
229228 @ Override
230229 public void writeField (FieldInfo info , long value ) throws IOException {
231- delegateWriter .writeField (info , value );
230+ if (isBloomFilterField (info ) == false ) {
231+ delegateWriter .writeField (info , value );
232+ }
232233 }
233234
234235 @ Override
235236 public void writeField (FieldInfo info , float value ) throws IOException {
236- delegateWriter .writeField (info , value );
237+ if (isBloomFilterField (info ) == false ) {
238+ delegateWriter .writeField (info , value );
239+ }
237240 }
238241
239242 @ Override
240243 public void writeField (FieldInfo info , double value ) throws IOException {
241- delegateWriter .writeField (info , value );
244+ if (isBloomFilterField (info ) == false ) {
245+ delegateWriter .writeField (info , value );
246+ }
242247 }
243248
244249 @ Override
245250 public void writeField (FieldInfo info , StoredFieldDataInput value ) throws IOException {
246- delegateWriter .writeField (info , value );
251+ if (isBloomFilterField (info ) == false ) {
252+ delegateWriter .writeField (info , value );
253+ }
247254 }
248255
249256 @ Override
250257 public void writeField (FieldInfo info , String value ) throws IOException {
251- delegateWriter .writeField (info , value );
258+ if (isBloomFilterField (info ) == false ) {
259+ delegateWriter .writeField (info , value );
260+ }
252261 }
253262
254263 @ Override
255264 public void writeField (FieldInfo info , BytesRef value ) throws IOException {
256- if (info . getName (). equals ( bloomFilterFieldName )) {
265+ if (isBloomFilterField ( info )) {
257266 addToBloomFilter (info , value );
258267 } else {
259268 delegateWriter .writeField (info , value );
260269 }
261270 }
262271
272+ private boolean isBloomFilterField (FieldInfo info ) {
273+ return (bloomFilterFieldInfo != null && bloomFilterFieldInfo .getFieldNumber () == info .getFieldNumber ())
274+ || info .getName ().equals (bloomFilterFieldName );
275+ }
276+
263277 private void addToBloomFilter (FieldInfo info , BytesRef value ) {
264278 bloomFilterFieldInfo = info ;
265279 var termHashes = hashTerm (value , hashes );
0 commit comments