Skip to content

Commit a1af7b9

Browse files
committed
Review comments
Skip skipping the bloom filter field for all data types Compute the bloom filter size in a simpler way
1 parent dab897f commit a1af7b9

File tree

1 file changed

+42
-28
lines changed

1 file changed

+42
-28
lines changed

server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -74,21 +74,10 @@ public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat {
7474

7575
// We use prime numbers with the Kirsch-Mitzenmacher technique to obtain multiple hashes from two hash functions
7676
private static final int[] PRIMES = new int[] { 2, 5, 11, 17, 23, 29, 41, 47, 53, 59, 71 };
77-
private static final int[] powerOfTwoBitSetSizes;
7877
private static final int DEFAULT_NUM_HASH_FUNCTIONS = 7;
7978
private static final byte BLOOM_FILTER_STORED = 1;
8079
private static final byte BLOOM_FILTER_NOT_STORED = 0;
81-
82-
static {
83-
// Precompute powers of two (2^1 to 2^26) for efficient modulo operations using bitwise AND.
84-
// We start from 2^1 (2 bits) and go up to 2^26 (67,108,864 bits / 8,388,608 bytes = 8 MB)
85-
// as the maximum, staying within positive int range.
86-
powerOfTwoBitSetSizes = new int[27];
87-
for (int i = 0; i < powerOfTwoBitSetSizes.length; i++) {
88-
powerOfTwoBitSetSizes[i] = 1 << i;
89-
assert powerOfTwoBitSetSizes[i] > 0;
90-
}
91-
}
80+
private static final ByteSizeValue MAX_BLOOM_FILTER_SIZE = ByteSizeValue.ofMb(8);
9281

9382
private final BigArrays bigArrays;
9483
private final String segmentSuffix;
@@ -109,15 +98,23 @@ public ES93BloomFilterStoredFieldsFormat(
10998
this.delegate = delegate;
11099
this.bloomFilterFieldName = bloomFilterFieldName;
111100
this.numHashFunctions = DEFAULT_NUM_HASH_FUNCTIONS;
112-
int bloomFilterSizeInBits = 0;
113-
// Find the closest power of 2 that fits the required size
114-
for (int powerOfTwoBitSetSize : powerOfTwoBitSetSizes) {
115-
if (powerOfTwoBitSetSize <= (Math.multiplyExact(bloomFilterSize.getBytes(), Byte.SIZE))) {
116-
bloomFilterSizeInBits = powerOfTwoBitSetSize;
117-
}
101+
102+
if (bloomFilterSize.getBytes() <= 0) {
103+
throw new IllegalArgumentException("bloom filter size must be greater than 0");
118104
}
119-
assert bloomFilterSizeInBits > 0;
120-
this.bloomFilterSizeInBits = bloomFilterSizeInBits;
105+
106+
var closestPowerOfTwoBloomFilterSizeInBytes = Long.highestOneBit(bloomFilterSize.getBytes());
107+
if (closestPowerOfTwoBloomFilterSizeInBytes > MAX_BLOOM_FILTER_SIZE.getBytes()) {
108+
throw new IllegalArgumentException(
109+
"bloom filter size ["
110+
+ bloomFilterSize
111+
+ "] is too large; "
112+
+ "must be "
113+
+ MAX_BLOOM_FILTER_SIZE
114+
+ " or less (rounded to nearest power of two)"
115+
);
116+
}
117+
this.bloomFilterSizeInBits = (int) Math.multiplyExact(closestPowerOfTwoBloomFilterSizeInBytes, Byte.SIZE);
121118
}
122119

123120
@Override
@@ -165,7 +162,7 @@ static class Writer extends StoredFieldsWriter {
165162
String bloomFilterFieldName,
166163
StoredFieldsWriter delegateWriter
167164
) throws IOException {
168-
assert isPowerOfTwo(bloomFilterSizeInBits) : "Bloom filter size is not a power of 2";
165+
assert isPowerOfTwo(bloomFilterSizeInBits) : "Bloom filter size is not a power of 2: " + bloomFilterSizeInBits;
169166
assert numHashFunctions <= PRIMES.length
170167
: "Number of hash functions must be <= " + PRIMES.length + " but was " + numHashFunctions;
171168

@@ -223,43 +220,60 @@ public void finishDocument() throws IOException {
223220

224221
@Override
225222
public void writeField(FieldInfo info, int value) throws IOException {
226-
delegateWriter.writeField(info, value);
223+
if (isBloomFilterField(info) == false) {
224+
delegateWriter.writeField(info, value);
225+
}
227226
}
228227

229228
@Override
230229
public void writeField(FieldInfo info, long value) throws IOException {
231-
delegateWriter.writeField(info, value);
230+
if (isBloomFilterField(info) == false) {
231+
delegateWriter.writeField(info, value);
232+
}
232233
}
233234

234235
@Override
235236
public void writeField(FieldInfo info, float value) throws IOException {
236-
delegateWriter.writeField(info, value);
237+
if (isBloomFilterField(info) == false) {
238+
delegateWriter.writeField(info, value);
239+
}
237240
}
238241

239242
@Override
240243
public void writeField(FieldInfo info, double value) throws IOException {
241-
delegateWriter.writeField(info, value);
244+
if (isBloomFilterField(info) == false) {
245+
delegateWriter.writeField(info, value);
246+
}
242247
}
243248

244249
@Override
245250
public void writeField(FieldInfo info, StoredFieldDataInput value) throws IOException {
246-
delegateWriter.writeField(info, value);
251+
if (isBloomFilterField(info) == false) {
252+
delegateWriter.writeField(info, value);
253+
}
247254
}
248255

249256
@Override
250257
public void writeField(FieldInfo info, String value) throws IOException {
251-
delegateWriter.writeField(info, value);
258+
if (isBloomFilterField(info) == false) {
259+
delegateWriter.writeField(info, value);
260+
}
252261
}
253262

254263
@Override
255264
public void writeField(FieldInfo info, BytesRef value) throws IOException {
256-
if (info.getName().equals(bloomFilterFieldName)) {
265+
if (isBloomFilterField(info)) {
257266
addToBloomFilter(info, value);
258267
} else {
259268
delegateWriter.writeField(info, value);
260269
}
261270
}
262271

272+
private boolean isBloomFilterField(FieldInfo info) {
273+
return (bloomFilterFieldInfo != null && bloomFilterFieldInfo.getFieldNumber() == info.getFieldNumber())
274+
|| info.getName().equals(bloomFilterFieldName);
275+
}
276+
263277
private void addToBloomFilter(FieldInfo info, BytesRef value) {
264278
bloomFilterFieldInfo = info;
265279
var termHashes = hashTerm(value, hashes);

0 commit comments

Comments
 (0)