From dab897f4f1ff293070a3340049aea9677025a5a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 27 Oct 2025 22:06:57 +0100
Subject: [PATCH 01/10] Add ES93BloomFilterStoredFieldsFormat for efficient
 field existence checks

Introduces a new stored fields format that builds a Bloom filter for a
specific field to enable fast existence checks without storing the field
itself. This delegates storage of all other fields to another
StoredFieldsFormat while maintaining the ability to quickly determine if
a document might contain the target field.
---
 .../bloomfilter/BloomFilterHashFunctions.java | 194 +++++++
 .../ES87BloomFilterPostingsFormat.java        | 182 +-----
 .../ES93BloomFilterStoredFieldsFormat.java    | 539 ++++++++++++++++++
 ...S93BloomFilterStoredFieldsFormatTests.java | 162 ++++++
 4 files changed, 896 insertions(+), 181 deletions(-)
 create mode 100644 server/src/main/java/org/elasticsearch/index/codec/bloomfilter/BloomFilterHashFunctions.java
 create mode 100644 server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
 create mode 100644 server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/BloomFilterHashFunctions.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/BloomFilterHashFunctions.java
new file mode 100644
index 0000000000000..8723b9dc5dbe3
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/BloomFilterHashFunctions.java
@@ -0,0 +1,194 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.bloomfilter;
+
+import org.elasticsearch.common.util.ByteUtils;
+
+public class BloomFilterHashFunctions {
+    private BloomFilterHashFunctions() {}
+
+    //
+    // The following Murmur3 implementation is borrowed from commons-codec.
+    //
+    /**
+     * Implementation of the MurmurHash3 128-bit hash functions.
+     *
+     * <p>
+     * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
+     * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
+     * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
+     * </p>
+     *
+     * <p>
+     * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
+     * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
+     * </p>
+     *
+     * <p>
+     * This is public domain code with no copyrights. From home page of
+     * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
+     * </p>
+     *
+     * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
+     * code." </blockquote>
+     *
+     * <p>
+     * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
+     * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
+     * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
+     * </p>
+     *
+     * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
+     * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
+     *      code</a>
+     * @see <a href=
+     *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
+     *      Apache Hive Murmer3</a>
+     * @since 1.13
+     */
+    public static final class MurmurHash3 {
+        /**
+         * A default seed to use for the murmur hash algorithm.
+         * Has the value {@code 104729}.
+         */
+        public static final int DEFAULT_SEED = 104729;
+
+        // Constants for 128-bit variant
+        private static final long C1 = 0x87c37b91114253d5L;
+        private static final long C2 = 0x4cf5ad432745937fL;
+        private static final int R1 = 31;
+        private static final int R2 = 27;
+        private static final int R3 = 33;
+        private static final int M = 5;
+        private static final int N1 = 0x52dce729;
+        private static final int N2 = 0x38495ab5;
+
+        /** No instance methods. */
+        private MurmurHash3() {}
+
+        /**
+         * Generates 64-bit hash from the byte array with the given offset, length and seed by discarding the second value of the 128-bit
+         * hash.
+         *
+         * This version uses the default seed.
+         *
+         * @param data The input byte array
+         * @param offset The first element of array
+         * @param length The length of array
+         * @return The sum of the two 64-bit hashes that make up the hash128
+         */
+        @SuppressWarnings("fallthrough")
+        public static long hash64(final byte[] data, final int offset, final int length) {
+            long h1 = MurmurHash3.DEFAULT_SEED;
+            long h2 = MurmurHash3.DEFAULT_SEED;
+            final int nblocks = length >> 4;
+
+            // body
+            for (int i = 0; i < nblocks; i++) {
+                final int index = offset + (i << 4);
+                long k1 = ByteUtils.readLongLE(data, index);
+                long k2 = ByteUtils.readLongLE(data, index + 8);
+
+                // mix functions for k1
+                k1 *= C1;
+                k1 = Long.rotateLeft(k1, R1);
+                k1 *= C2;
+                h1 ^= k1;
+                h1 = Long.rotateLeft(h1, R2);
+                h1 += h2;
+                h1 = h1 * M + N1;
+
+                // mix functions for k2
+                k2 *= C2;
+                k2 = Long.rotateLeft(k2, R3);
+                k2 *= C1;
+                h2 ^= k2;
+                h2 = Long.rotateLeft(h2, R1);
+                h2 += h1;
+                h2 = h2 * M + N2;
+            }
+
+            // tail
+            long k1 = 0;
+            long k2 = 0;
+            final int index = offset + (nblocks << 4);
+            switch (offset + length - index) {
+                case 15:
+                    k2 ^= ((long) data[index + 14] & 0xff) << 48;
+                case 14:
+                    k2 ^= ((long) data[index + 13] & 0xff) << 40;
+                case 13:
+                    k2 ^= ((long) data[index + 12] & 0xff) << 32;
+                case 12:
+                    k2 ^= ((long) data[index + 11] & 0xff) << 24;
+                case 11:
+                    k2 ^= ((long) data[index + 10] & 0xff) << 16;
+                case 10:
+                    k2 ^= ((long) data[index + 9] & 0xff) << 8;
+                case 9:
+                    k2 ^= data[index + 8] & 0xff;
+                    k2 *= C2;
+                    k2 = Long.rotateLeft(k2, R3);
+                    k2 *= C1;
+                    h2 ^= k2;
+
+                case 8:
+                    k1 ^= ((long) data[index + 7] & 0xff) << 56;
+                case 7:
+                    k1 ^= ((long) data[index + 6] & 0xff) << 48;
+                case 6:
+                    k1 ^= ((long) data[index + 5] & 0xff) << 40;
+                case 5:
+                    k1 ^= ((long) data[index + 4] & 0xff) << 32;
+                case 4:
+                    k1 ^= ((long) data[index + 3] & 0xff) << 24;
+                case 3:
+                    k1 ^= ((long) data[index + 2] & 0xff) << 16;
+                case 2:
+                    k1 ^= ((long) data[index + 1] & 0xff) << 8;
+                case 1:
+                    k1 ^= data[index] & 0xff;
+                    k1 *= C1;
+                    k1 = Long.rotateLeft(k1, R1);
+                    k1 *= C2;
+                    h1 ^= k1;
+            }
+
+            // finalization
+            h1 ^= length;
+            h2 ^= length;
+
+            h1 += h2;
+            h2 += h1;
+
+            h1 = fmix64(h1);
+            h2 = fmix64(h2);
+
+            h1 += h2;
+
+            return h1;
+        }
+
+        /**
+         * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
+         *
+         * @param hash The current hash
+         * @return The final hash
+         */
+        private static long fmix64(long hash) {
+            hash ^= (hash >>> 33);
+            hash *= 0xff51afd7ed558ccdL;
+            hash ^= (hash >>> 33);
+            hash *= 0xc4ceb9fe1a85ec53L;
+            hash ^= (hash >>> 33);
+            return hash;
+        }
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES87BloomFilterPostingsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES87BloomFilterPostingsFormat.java
index abf68abe51887..65d2c1317b86e 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES87BloomFilterPostingsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES87BloomFilterPostingsFormat.java
@@ -47,7 +47,6 @@
 import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.ByteArray;
-import org.elasticsearch.common.util.ByteUtils;
 import org.elasticsearch.core.IOUtils;
 
 import java.io.Closeable;
@@ -548,7 +547,7 @@ static int numBytesForBloomFilter(int bloomFilterSize) {
     // Uses MurmurHash3-128 to generate a 64-bit hash value, then picks 7 subsets of 31 bits each and returns the values in the
     // outputs array. This provides us with 7 reasonably independent hashes of the data for the cost of one MurmurHash3 calculation.
     static int[] hashTerm(BytesRef br, int[] outputs) {
-        final long hash64 = MurmurHash3.hash64(br.bytes, br.offset, br.length);
+        final long hash64 = BloomFilterHashFunctions.MurmurHash3.hash64(br.bytes, br.offset, br.length);
         final int upperHalf = (int) (hash64 >> 32);
         final int lowerHalf = (int) hash64;
         // Derive 7 hash outputs by combining the two 64-bit halves, adding the upper half multiplied with different small constants
@@ -562,183 +561,4 @@ static int[] hashTerm(BytesRef br, int[] outputs) {
         outputs[6] = (lowerHalf + 17 * upperHalf) & 0x7FFF_FFFF;
         return outputs;
     }
-
-    //
-    // The following Murmur3 implementation is borrowed from commons-codec.
-    //
-
-    /**
-     * Implementation of the MurmurHash3 128-bit hash functions.
-     *
-     * <p>
-     * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
-     * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
-     * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
-     * </p>
-     *
-     * <p>
-     * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
-     * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
-     * </p>
-     *
-     * <p>
-     * This is public domain code with no copyrights. From home page of
-     * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
-     * </p>
-     *
-     * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
-     * code." </blockquote>
-     *
-     * <p>
-     * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
-     * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
-     * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
-     * </p>
-     *
-     * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
-     * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
-     *      code</a>
-     * @see <a href=
-     *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
-     *      Apache Hive Murmer3</a>
-     * @since 1.13
-     */
-    public static final class MurmurHash3 {
-        /**
-         * A default seed to use for the murmur hash algorithm.
-         * Has the value {@code 104729}.
-         */
-        public static final int DEFAULT_SEED = 104729;
-
-        // Constants for 128-bit variant
-        private static final long C1 = 0x87c37b91114253d5L;
-        private static final long C2 = 0x4cf5ad432745937fL;
-        private static final int R1 = 31;
-        private static final int R2 = 27;
-        private static final int R3 = 33;
-        private static final int M = 5;
-        private static final int N1 = 0x52dce729;
-        private static final int N2 = 0x38495ab5;
-
-        /** No instance methods. */
-        private MurmurHash3() {}
-
-        /**
-         * Generates 64-bit hash from the byte array with the given offset, length and seed by discarding the second value of the 128-bit
-         * hash.
-         *
-         * This version uses the default seed.
-         *
-         * @param data The input byte array
-         * @param offset The first element of array
-         * @param length The length of array
-         * @return The sum of the two 64-bit hashes that make up the hash128
-         */
-        @SuppressWarnings("fallthrough")
-        public static long hash64(final byte[] data, final int offset, final int length) {
-            long h1 = MurmurHash3.DEFAULT_SEED;
-            long h2 = MurmurHash3.DEFAULT_SEED;
-            final int nblocks = length >> 4;
-
-            // body
-            for (int i = 0; i < nblocks; i++) {
-                final int index = offset + (i << 4);
-                long k1 = ByteUtils.readLongLE(data, index);
-                long k2 = ByteUtils.readLongLE(data, index + 8);
-
-                // mix functions for k1
-                k1 *= C1;
-                k1 = Long.rotateLeft(k1, R1);
-                k1 *= C2;
-                h1 ^= k1;
-                h1 = Long.rotateLeft(h1, R2);
-                h1 += h2;
-                h1 = h1 * M + N1;
-
-                // mix functions for k2
-                k2 *= C2;
-                k2 = Long.rotateLeft(k2, R3);
-                k2 *= C1;
-                h2 ^= k2;
-                h2 = Long.rotateLeft(h2, R1);
-                h2 += h1;
-                h2 = h2 * M + N2;
-            }
-
-            // tail
-            long k1 = 0;
-            long k2 = 0;
-            final int index = offset + (nblocks << 4);
-            switch (offset + length - index) {
-                case 15:
-                    k2 ^= ((long) data[index + 14] & 0xff) << 48;
-                case 14:
-                    k2 ^= ((long) data[index + 13] & 0xff) << 40;
-                case 13:
-                    k2 ^= ((long) data[index + 12] & 0xff) << 32;
-                case 12:
-                    k2 ^= ((long) data[index + 11] & 0xff) << 24;
-                case 11:
-                    k2 ^= ((long) data[index + 10] & 0xff) << 16;
-                case 10:
-                    k2 ^= ((long) data[index + 9] & 0xff) << 8;
-                case 9:
-                    k2 ^= data[index + 8] & 0xff;
-                    k2 *= C2;
-                    k2 = Long.rotateLeft(k2, R3);
-                    k2 *= C1;
-                    h2 ^= k2;
-
-                case 8:
-                    k1 ^= ((long) data[index + 7] & 0xff) << 56;
-                case 7:
-                    k1 ^= ((long) data[index + 6] & 0xff) << 48;
-                case 6:
-                    k1 ^= ((long) data[index + 5] & 0xff) << 40;
-                case 5:
-                    k1 ^= ((long) data[index + 4] & 0xff) << 32;
-                case 4:
-                    k1 ^= ((long) data[index + 3] & 0xff) << 24;
-                case 3:
-                    k1 ^= ((long) data[index + 2] & 0xff) << 16;
-                case 2:
-                    k1 ^= ((long) data[index + 1] & 0xff) << 8;
-                case 1:
-                    k1 ^= data[index] & 0xff;
-                    k1 *= C1;
-                    k1 = Long.rotateLeft(k1, R1);
-                    k1 *= C2;
-                    h1 ^= k1;
-            }
-
-            // finalization
-            h1 ^= length;
-            h2 ^= length;
-
-            h1 += h2;
-            h2 += h1;
-
-            h1 = fmix64(h1);
-            h2 = fmix64(h2);
-
-            h1 += h2;
-
-            return h1;
-        }
-
-        /**
-         * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
-         *
-         * @param hash The current hash
-         * @return The final hash
-         */
-        private static long fmix64(long hash) {
-            hash ^= (hash >>> 33);
-            hash *= 0xff51afd7ed558ccdL;
-            hash ^= (hash >>> 33);
-            hash *= 0xc4ceb9fe1a85ec53L;
-            hash ^= (hash >>> 33);
-            return hash;
-        }
-    }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
new file mode 100644
index 0000000000000..75998d982ccb9
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
@@ -0,0 +1,539 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.bloomfilter;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.StoredFieldDataInput;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RandomAccessInput;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.ByteArray;
+import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.core.Nullable;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import static org.elasticsearch.index.codec.bloomfilter.BloomFilterHashFunctions.MurmurHash3.hash64;
+
+/**
+ * A stored fields format that builds a Bloom filter for a specific field to enable fast
+ * existence checks, while delegating storage of all other fields to another StoredFieldsFormat.
+ *
+ * <p>The field used to build the Bloom filter is not stored, as only its presence
+ * needs to be tracked for filtering purposes. This reduces storage overhead while maintaining
+ * the ability to quickly determine if a segment might contain the field.
+ *
+ * <p><b>File formats</b>
+ *
+ * <p>Bloom filter stored fields are represented by two files:
+ *
+ * <ol>
+ *   <li>
+ *       <p>Bloom filter data file (extension .sfbf). This file stores the bloom
+ *       filter bitset.
+ *   <li>
+ *       <p>A bloom filter meta file (extension .sfbfm). This file stores metadata about the
+ *       bloom filters stored in the bloom filter data file. The in-memory representation can
+ *       be found in {@link BloomFilterMetadata}.
+ * </ol>
+ */
+public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat {
+    public static final String STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME = "ES93BloomFilterStoredFieldsFormat";
+    public static final String STORED_FIELDS_BLOOM_FILTER_EXTENSION = "sfbf";
+    public static final String STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION = "sfbfm";
+    private static final int VERSION_START = 1;
+    private static final int VERSION_CURRENT = VERSION_START;
+
+    // We use prime numbers with the Kirsch-Mitzenmacher technique to obtain multiple hashes from two hash functions
+    private static final int[] PRIMES = new int[] { 2, 5, 11, 17, 23, 29, 41, 47, 53, 59, 71 };
+    private static final int[] powerOfTwoBitSetSizes;
+    private static final int DEFAULT_NUM_HASH_FUNCTIONS = 7;
+    private static final byte BLOOM_FILTER_STORED = 1;
+    private static final byte BLOOM_FILTER_NOT_STORED = 0;
+
+    static {
+        // Precompute powers of two (2^1 to 2^26) for efficient modulo operations using bitwise AND.
+        // We start from 2^1 (2 bits) and go up to 2^26 (67,108,864 bits / 8,388,608 bytes = 8 MB)
+        // as the maximum, staying within positive int range.
+        powerOfTwoBitSetSizes = new int[27];
+        for (int i = 0; i < powerOfTwoBitSetSizes.length; i++) {
+            powerOfTwoBitSetSizes[i] = 1 << i;
+            assert powerOfTwoBitSetSizes[i] > 0;
+        }
+    }
+
+    private final BigArrays bigArrays;
+    private final String segmentSuffix;
+    private final StoredFieldsFormat delegate;
+    private final String bloomFilterFieldName;
+    private final int numHashFunctions;
+    private final int bloomFilterSizeInBits;
+
+    public ES93BloomFilterStoredFieldsFormat(
+        BigArrays bigArrays,
+        String segmentSuffix,
+        StoredFieldsFormat delegate,
+        ByteSizeValue bloomFilterSize,
+        String bloomFilterFieldName
+    ) {
+        this.bigArrays = bigArrays;
+        this.segmentSuffix = segmentSuffix;
+        this.delegate = delegate;
+        this.bloomFilterFieldName = bloomFilterFieldName;
+        this.numHashFunctions = DEFAULT_NUM_HASH_FUNCTIONS;
+        int bloomFilterSizeInBits = 0;
+        // Find the closest power of 2 that fits the required size
+        for (int powerOfTwoBitSetSize : powerOfTwoBitSetSizes) {
+            if (powerOfTwoBitSetSize <= (Math.multiplyExact(bloomFilterSize.getBytes(), Byte.SIZE))) {
+                bloomFilterSizeInBits = powerOfTwoBitSetSize;
+            }
+        }
+        assert bloomFilterSizeInBits > 0;
+        this.bloomFilterSizeInBits = bloomFilterSizeInBits;
+    }
+
+    @Override
+    public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+        return new Reader(directory, si, fn, context, segmentSuffix, delegate.fieldsReader(directory, si, fn, context));
+    }
+
+    @Override
+    public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
+        // TODO: compute the bloom filter size based on heuristics and oversize factor
+        return new Writer(
+            directory,
+            si,
+            context,
+            segmentSuffix,
+            bigArrays,
+            numHashFunctions,
+            bloomFilterSizeInBits,
+            bloomFilterFieldName,
+            delegate.fieldsWriter(directory, si, context)
+        );
+    }
+
+    static class Writer extends StoredFieldsWriter {
+        private final IndexOutput bloomFilterDataOut;
+        private final IndexOutput metadataOut;
+        private final ByteArray buffer;
+        private final List<Closeable> toClose = new ArrayList<>();
+        private final int[] hashes;
+        private final int numHashFunctions;
+        private final int bloomFilterSizeInBits;
+        private final int bloomFilterSizeInBytes;
+        private final StoredFieldsWriter delegateWriter;
+        private final String bloomFilterFieldName;
+        private FieldInfo bloomFilterFieldInfo;
+
+        Writer(
+            Directory directory,
+            SegmentInfo segmentInfo,
+            IOContext context,
+            String segmentSuffix,
+            BigArrays bigArrays,
+            int numHashFunctions,
+            int bloomFilterSizeInBits,
+            String bloomFilterFieldName,
+            StoredFieldsWriter delegateWriter
+        ) throws IOException {
+            assert isPowerOfTwo(bloomFilterSizeInBits) : "Bloom filter size is not a power of 2";
+            assert numHashFunctions <= PRIMES.length
+                : "Number of hash functions must be <= " + PRIMES.length + " but was " + numHashFunctions;
+
+            this.numHashFunctions = numHashFunctions;
+            this.hashes = new int[numHashFunctions];
+            this.bloomFilterSizeInBits = bloomFilterSizeInBits;
+            this.bloomFilterSizeInBytes = bloomFilterSizeInBits / Byte.SIZE;
+            this.bloomFilterFieldName = bloomFilterFieldName;
+
+            this.delegateWriter = delegateWriter;
+            toClose.add(delegateWriter);
+
+            boolean success = false;
+            try {
+                bloomFilterDataOut = directory.createOutput(bloomFilterFileName(segmentInfo, segmentSuffix), context);
+                toClose.add(bloomFilterDataOut);
+                CodecUtil.writeIndexHeader(
+                    bloomFilterDataOut,
+                    STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME,
+                    VERSION_CURRENT,
+                    segmentInfo.getId(),
+                    segmentSuffix
+                );
+
+                metadataOut = directory.createOutput(bloomFilterMetadataFileName(segmentInfo, segmentSuffix), context);
+                toClose.add(metadataOut);
+                CodecUtil.writeIndexHeader(
+                    metadataOut,
+                    STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME,
+                    VERSION_CURRENT,
+                    segmentInfo.getId(),
+                    segmentSuffix
+                );
+
+                buffer = bigArrays.newByteArray(bloomFilterSizeInBytes, false);
+                toClose.add(buffer);
+
+                success = true;
+            } finally {
+                if (success == false) {
+                    IOUtils.closeWhileHandlingException(toClose);
+                }
+            }
+        }
+
+        @Override
+        public void startDocument() throws IOException {
+            delegateWriter.startDocument();
+        }
+
+        @Override
+        public void finishDocument() throws IOException {
+            delegateWriter.finishDocument();
+        }
+
+        @Override
+        public void writeField(FieldInfo info, int value) throws IOException {
+            delegateWriter.writeField(info, value);
+        }
+
+        @Override
+        public void writeField(FieldInfo info, long value) throws IOException {
+            delegateWriter.writeField(info, value);
+        }
+
+        @Override
+        public void writeField(FieldInfo info, float value) throws IOException {
+            delegateWriter.writeField(info, value);
+        }
+
+        @Override
+        public void writeField(FieldInfo info, double value) throws IOException {
+            delegateWriter.writeField(info, value);
+        }
+
+        @Override
+        public void writeField(FieldInfo info, StoredFieldDataInput value) throws IOException {
+            delegateWriter.writeField(info, value);
+        }
+
+        @Override
+        public void writeField(FieldInfo info, String value) throws IOException {
+            delegateWriter.writeField(info, value);
+        }
+
+        @Override
+        public void writeField(FieldInfo info, BytesRef value) throws IOException {
+            if (info.getName().equals(bloomFilterFieldName)) {
+                addToBloomFilter(info, value);
+            } else {
+                delegateWriter.writeField(info, value);
+            }
+        }
+
+        private void addToBloomFilter(FieldInfo info, BytesRef value) {
+            bloomFilterFieldInfo = info;
+            var termHashes = hashTerm(value, hashes);
+            for (int hash : termHashes) {
+                final int posInBitArray = hash & (bloomFilterSizeInBits - 1);
+                final int pos = posInBitArray >> 3; // div 8
+                final int mask = 1 << (posInBitArray & 7); // mod 8
+                final byte val = (byte) (buffer.get(pos) | mask);
+                buffer.set(pos, val);
+            }
+        }
+
+        @Override
+        public void finish(int numDocs) throws IOException {
+            finishBloomFilterStoredFormat();
+            delegateWriter.finish(numDocs);
+        }
+
+        private void finishBloomFilterStoredFormat() throws IOException {
+            BloomFilterMetadata bloomFilterMetadata = null;
+            if (bloomFilterFieldInfo != null) {
+                bloomFilterMetadata = new BloomFilterMetadata(
+                    bloomFilterFieldInfo,
+                    bloomFilterDataOut.getFilePointer(),
+                    bloomFilterSizeInBits,
+                    numHashFunctions
+                );
+
+                if (buffer.hasArray()) {
+                    bloomFilterDataOut.writeBytes(buffer.array(), 0, bloomFilterSizeInBytes);
+                } else {
+                    BytesReference.fromByteArray(buffer, bloomFilterSizeInBytes).writeTo(new IndexOutputOutputStream(bloomFilterDataOut));
+                }
+            }
+            CodecUtil.writeFooter(bloomFilterDataOut);
+
+            if (bloomFilterMetadata != null) {
+                metadataOut.writeByte(BLOOM_FILTER_STORED);
+                bloomFilterMetadata.writeTo(metadataOut);
+            } else {
+                metadataOut.writeByte(BLOOM_FILTER_NOT_STORED);
+            }
+            CodecUtil.writeFooter(metadataOut);
+        }
+
+        @Override
+        public int merge(MergeState mergeState) throws IOException {
+            // Skip merging the bloom filter for now
+            finishBloomFilterStoredFormat();
+            return delegateWriter.merge(mergeState);
+        }
+
+        @Override
+        public void close() throws IOException {
+            IOUtils.close(toClose);
+        }
+
+        @Override
+        public long ramBytesUsed() {
+            return buffer.ramBytesUsed() + delegateWriter.ramBytesUsed();
+        }
+    }
+
+    public static class Reader extends StoredFieldsReader {
+        @Nullable
+        private final BloomFilterFieldReader bloomFilterFieldReader;
+        private final StoredFieldsReader delegateReader;
+
+        Reader(
+            Directory directory,
+            SegmentInfo si,
+            FieldInfos fn,
+            IOContext context,
+            String segmentSuffix,
+            StoredFieldsReader delegateReader
+        ) throws IOException {
+            this.delegateReader = delegateReader;
+            var success = false;
+            try {
+                bloomFilterFieldReader = BloomFilterFieldReader.open(directory, si, fn, context, segmentSuffix);
+                success = true;
+            } finally {
+                if (success == false) {
+                    delegateReader.close();
+                }
+            }
+        }
+
+        @Override
+        public StoredFieldsReader clone() {
+            return this;
+        }
+
+        @Override
+        public void checkIntegrity() throws IOException {
+            if (bloomFilterFieldReader != null) {
+                bloomFilterFieldReader.checkIntegrity();
+            }
+            delegateReader.checkIntegrity();
+        }
+
+        @Override
+        public void close() throws IOException {
+            IOUtils.close(bloomFilterFieldReader, delegateReader);
+        }
+
+        @Override
+        public void document(int docID, StoredFieldVisitor visitor) throws IOException {
+            delegateReader.document(docID, visitor);
+        }
+
+        @Nullable
+        BloomFilterFieldReader getBloomFilterFieldReader() {
+            return bloomFilterFieldReader;
+        }
+    }
+
+    record BloomFilterMetadata(FieldInfo fieldInfo, long fileOffset, int sizeInBits, int numHashFunctions) {
+        BloomFilterMetadata {
+            assert fieldInfo != null;
+            assert isPowerOfTwo(sizeInBits);
+        }
+
+        int sizeInBytes() {
+            return sizeInBits / Byte.SIZE;
+        }
+
+        void writeTo(IndexOutput indexOut) throws IOException {
+            indexOut.writeVInt(fieldInfo.number);
+            indexOut.writeVLong(fileOffset);
+            indexOut.writeVInt(sizeInBits);
+            indexOut.writeVInt(numHashFunctions);
+        }
+
+        static BloomFilterMetadata readFrom(IndexInput in, FieldInfos fieldInfos) throws IOException {
+            final var fieldInfo = fieldInfos.fieldInfo(in.readVInt());
+            final long fileOffset = in.readVLong();
+            final int bloomFilterSizeInBits = in.readVInt();
+            final int numOfHashFunctions = in.readVInt();
+            return new BloomFilterMetadata(fieldInfo, fileOffset, bloomFilterSizeInBits, numOfHashFunctions);
+        }
+    }
+
+    public static class BloomFilterFieldReader implements Closeable {
+        private final FieldInfo fieldInfo;
+        private final IndexInput bloomFilterData;
+        private final RandomAccessInput bloomFilterIn;
+        private final int bloomFilterSizeInBits;
+        private final int[] hashes;
+
+        @Nullable
+        public static BloomFilterFieldReader open(
+            Directory directory,
+            SegmentInfo si,
+            FieldInfos fn,
+            IOContext context,
+            String segmentSuffix
+        ) throws IOException {
+            List<Closeable> toClose = new ArrayList<>();
+            boolean success = false;
+            try (var metaInput = directory.openChecksumInput(bloomFilterMetadataFileName(si, segmentSuffix))) {
+                var metadataVersion = CodecUtil.checkIndexHeader(
+                    metaInput,
+                    STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME,
+                    VERSION_START,
+                    VERSION_CURRENT,
+                    si.getId(),
+                    segmentSuffix
+                );
+                var hasBloomFilter = metaInput.readByte() == BLOOM_FILTER_STORED;
+                if (hasBloomFilter == false) {
+                    return null;
+                }
+                BloomFilterMetadata bloomFilterMetadata = BloomFilterMetadata.readFrom(metaInput, fn);
+                CodecUtil.checkFooter(metaInput);
+
+                IndexInput bloomFilterData = directory.openInput(bloomFilterFileName(si, segmentSuffix), context);
+                toClose.add(bloomFilterData);
+                var bloomFilterDataVersion = CodecUtil.checkIndexHeader(
+                    bloomFilterData,
+                    STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME,
+                    VERSION_START,
+                    VERSION_CURRENT,
+                    si.getId(),
+                    segmentSuffix
+                );
+
+                if (metadataVersion != bloomFilterDataVersion) {
+                    throw new CorruptIndexException(
+                        "Format versions mismatch: meta=" + metadataVersion + ", data=" + bloomFilterDataVersion,
+                        bloomFilterData
+                    );
+                }
+                CodecUtil.checksumEntireFile(bloomFilterData);
+
+                var bloomFilterFieldReader = new BloomFilterFieldReader(
+                    bloomFilterMetadata.fieldInfo(),
+                    bloomFilterData.randomAccessSlice(bloomFilterMetadata.fileOffset(), bloomFilterMetadata.sizeInBytes()),
+                    bloomFilterMetadata.sizeInBits(),
+                    bloomFilterMetadata.numHashFunctions(),
+                    bloomFilterData
+                );
+                success = true;
+                return bloomFilterFieldReader;
+            } finally {
+                if (success == false) {
+                    IOUtils.closeWhileHandlingException(toClose);
+                }
+            }
+        }
+
+        public BloomFilterFieldReader(
+            FieldInfo fieldInfo,
+            RandomAccessInput bloomFilterIn,
+            int bloomFilterSizeInBits,
+            int numHashFunctions,
+            IndexInput bloomFilterData
+        ) {
+            this.fieldInfo = Objects.requireNonNull(fieldInfo);
+            this.bloomFilterIn = bloomFilterIn;
+            this.bloomFilterSizeInBits = bloomFilterSizeInBits;
+            this.hashes = new int[numHashFunctions];
+            this.bloomFilterData = bloomFilterData;
+        }
+
+        public boolean mayContainTerm(String field, BytesRef term) throws IOException {
+            assert fieldInfo.getName().equals(field);
+
+            var termHashes = hashTerm(term, hashes);
+
+            for (int hash : termHashes) {
+                final int posInBitArray = hash & (bloomFilterSizeInBits - 1);
+                final int pos = posInBitArray >> 3; // div 8
+                final int mask = 1 << (posInBitArray & 7); // mod 8
+                final byte bits = bloomFilterIn.readByte(pos);
+                if ((bits & mask) == 0) {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        void checkIntegrity() throws IOException {
+            CodecUtil.checksumEntireFile(bloomFilterData);
+        }
+
+        @Override
+        public void close() throws IOException {
+            bloomFilterData.close();
+        }
+    }
+
+    private static int[] hashTerm(BytesRef value, int[] outputs) {
+        long hash64 = hash64(value.bytes, value.offset, value.length);
+        // First use output splitting to get two hash values out of a single hash function
+        int upperHalf = (int) (hash64 >> Integer.SIZE);
+        int lowerHalf = (int) hash64;
+        // Then use the Kirsch-Mitzenmacher technique to obtain multiple hashes efficiently
+        for (int i = 0; i < outputs.length; i++) {
+            // Use prime numbers as the constant for the KM technique so these don't have a common gcd
+            outputs[i] = (lowerHalf + PRIMES[i] * upperHalf) & 0x7FFF_FFFF; // Clears sign bit, gives positive 31-bit values
+        }
+        return outputs;
+    }
+
+    private static boolean isPowerOfTwo(int value) {
+        return (value & (value - 1)) == 0;
+    }
+
+    private static String bloomFilterMetadataFileName(SegmentInfo segmentInfo, String segmentSuffix) {
+        return IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION);
+    }
+
+    private static String bloomFilterFileName(SegmentInfo segmentInfo, String segmentSuffix) {
+        return IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, STORED_FIELDS_BLOOM_FILTER_EXTENSION);
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
new file mode 100644
index 0000000000000..165b8232a2f31
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.bloomfilter;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.StandardDirectoryReader;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
+import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.UUIDs;
+import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.mapper.IdFieldMapper;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.nullValue;
+
+public class ES93BloomFilterStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase {
+
+    static {
+        LogConfigurator.loadLog4jPlugins();
+        LogConfigurator.configureESLogging(); // native access requires logging to be initialized
+    }
+
+    @Override
+    protected Codec getCodec() {
+        return new AssertingCodec() {
+            @Override
+            public StoredFieldsFormat storedFieldsFormat() {
+                return new ES93BloomFilterStoredFieldsFormat(
+                    BigArrays.NON_RECYCLING_INSTANCE,
+                    "",
+                    TestUtil.getDefaultCodec().storedFieldsFormat(),
+                    ByteSizeValue.ofKb(2),
+                    IdFieldMapper.NAME
+                );
+            }
+        };
+    }
+
+    @Override
+    protected void addRandomFields(Document doc) {
+
+    }
+
+    public void testBloomFilterFieldIsNotStoredAndBloomFilterCanBeChecked() throws IOException {
+        try (var directory = newDirectory()) {
+            Analyzer analyzer = new MockAnalyzer(random());
+            IndexWriterConfig conf = newIndexWriterConfig(analyzer);
+            conf.setCodec(getCodec());
+            conf.setMergePolicy(newLogMergePolicy());
+            // We want to have at most 1 segment
+            conf.setMaxBufferedDocs(200);
+            // The stored fields reader that can be accessed through a StandardDirectoryReader wraps
+            // the ES93BloomFilterStoredFieldsFormat.Reader. Thus, we need to open it directly from
+            // the segment info codec and if we use compound files we would need to obtain a compound
+            // file directory. For simplicity, we just use regular files.
+            conf.setUseCompoundFile(false);
+            // We don't use the RandomIndexWriter because we want to control the settings so we get
+            // deterministic test runs
+            try (IndexWriter writer = new IndexWriter(directory, conf)) {
+                FieldType customType = new FieldType(TextField.TYPE_STORED);
+                customType.setTokenized(false);
+                Field hostField = newField("host", "", customType);
+                var fieldType = new FieldType();
+                fieldType.setStored(true);
+                fieldType.setTokenized(false);
+                Field idField = newField(random(), IdFieldMapper.NAME, getBytesRefFromString(""), fieldType);
+
+                List<BytesRef> indexedIds = new ArrayList<>();
+                var docCount = atLeast(50);
+                for (int i = 0; i < docCount; i++) {
+                    Document doc = new Document();
+                    doc.add(idField);
+                    var id = getBytesRefFromString(UUIDs.randomBase64UUID());
+                    indexedIds.add(id);
+                    idField.setBytesValue(id);
+                    doc.add(hostField);
+                    hostField.setStringValue("host-" + i);
+                    writer.addDocument(doc);
+                }
+
+                try (var directoryReader = StandardDirectoryReader.open(writer)) {
+                    for (LeafReaderContext leaf : directoryReader.leaves()) {
+                        try (ES93BloomFilterStoredFieldsFormat.Reader fieldReader = getBloomFilterReaderFrom(leaf)) {
+                            var bloomFilterFieldReader = fieldReader.getBloomFilterFieldReader();
+                            // the bloom filter reader is null only if the _id field is not stored during indexing
+                            assertThat(bloomFilterFieldReader, is(not(nullValue())));
+
+                            for (BytesRef indexedId : indexedIds) {
+                                assertThat(bloomFilterFieldReader.mayContainTerm(IdFieldMapper.NAME, indexedId), is(true));
+                            }
+                            assertThat(
+                                bloomFilterFieldReader.mayContainTerm(IdFieldMapper.NAME, getBytesRefFromString("random")),
+                                is(false)
+                            );
+
+                            assertThat(
+                                bloomFilterFieldReader.mayContainTerm(IdFieldMapper.NAME, getBytesRefFromString("12345")),
+                                is(false)
+                            );
+                        }
+                    }
+
+                    var storedFields = directoryReader.storedFields();
+                    for (int docId = 0; docId < docCount; docId++) {
+                        var document = storedFields.document(docId);
+                        // The _id field is not actually stored, just used to build the bloom filter
+                        assertThat(document.get(IdFieldMapper.NAME), nullValue());
+                        assertThat(document.get("host"), not(nullValue()));
+                    }
+                }
+            }
+        }
+    }
+
+    private static BytesRef getBytesRefFromString(String random) {
+        return new BytesRef(random.getBytes(StandardCharsets.UTF_8));
+    }
+
+    private ES93BloomFilterStoredFieldsFormat.Reader getBloomFilterReaderFrom(LeafReaderContext leafReaderContext) throws IOException {
+        LeafReader reader = leafReaderContext.reader();
+        var fieldInfos = reader.getFieldInfos();
+        assertThat(reader, is(instanceOf(SegmentReader.class)));
+        SegmentReader segmentReader = (SegmentReader) reader;
+        SegmentInfo si = segmentReader.getSegmentInfo().info;
+
+        var storedFieldsReader = si.getCodec().storedFieldsFormat().fieldsReader(si.dir, si, fieldInfos, IOContext.DEFAULT);
+        assertThat(storedFieldsReader, is(instanceOf(ES93BloomFilterStoredFieldsFormat.Reader.class)));
+        return (ES93BloomFilterStoredFieldsFormat.Reader) storedFieldsReader;
+    }
+}

From a1af7b966a245fdceacabf1e723402564fc6ec1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Thu, 30 Oct 2025 15:00:22 +0100
Subject: [PATCH 02/10] Review comments

Skip skipping the bloom filter field for all data types
Compute the bloom filter size in a simpler way
---
 .../ES93BloomFilterStoredFieldsFormat.java    | 70 +++++++++++--------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
index 75998d982ccb9..600912fdcece0 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
@@ -74,21 +74,10 @@ public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat {
 
     // We use prime numbers with the Kirsch-Mitzenmacher technique to obtain multiple hashes from two hash functions
     private static final int[] PRIMES = new int[] { 2, 5, 11, 17, 23, 29, 41, 47, 53, 59, 71 };
-    private static final int[] powerOfTwoBitSetSizes;
     private static final int DEFAULT_NUM_HASH_FUNCTIONS = 7;
     private static final byte BLOOM_FILTER_STORED = 1;
     private static final byte BLOOM_FILTER_NOT_STORED = 0;
-
-    static {
-        // Precompute powers of two (2^1 to 2^26) for efficient modulo operations using bitwise AND.
-        // We start from 2^1 (2 bits) and go up to 2^26 (67,108,864 bits / 8,388,608 bytes = 8 MB)
-        // as the maximum, staying within positive int range.
-        powerOfTwoBitSetSizes = new int[27];
-        for (int i = 0; i < powerOfTwoBitSetSizes.length; i++) {
-            powerOfTwoBitSetSizes[i] = 1 << i;
-            assert powerOfTwoBitSetSizes[i] > 0;
-        }
-    }
+    private static final ByteSizeValue MAX_BLOOM_FILTER_SIZE = ByteSizeValue.ofMb(8);
 
     private final BigArrays bigArrays;
     private final String segmentSuffix;
@@ -109,15 +98,23 @@ public ES93BloomFilterStoredFieldsFormat(
         this.delegate = delegate;
         this.bloomFilterFieldName = bloomFilterFieldName;
         this.numHashFunctions = DEFAULT_NUM_HASH_FUNCTIONS;
-        int bloomFilterSizeInBits = 0;
-        // Find the closest power of 2 that fits the required size
-        for (int powerOfTwoBitSetSize : powerOfTwoBitSetSizes) {
-            if (powerOfTwoBitSetSize <= (Math.multiplyExact(bloomFilterSize.getBytes(), Byte.SIZE))) {
-                bloomFilterSizeInBits = powerOfTwoBitSetSize;
-            }
+
+        if (bloomFilterSize.getBytes() <= 0) {
+            throw new IllegalArgumentException("bloom filter size must be greater than 0");
         }
-        assert bloomFilterSizeInBits > 0;
-        this.bloomFilterSizeInBits = bloomFilterSizeInBits;
+
+        var closestPowerOfTwoBloomFilterSizeInBytes = Long.highestOneBit(bloomFilterSize.getBytes());
+        if (closestPowerOfTwoBloomFilterSizeInBytes > MAX_BLOOM_FILTER_SIZE.getBytes()) {
+            throw new IllegalArgumentException(
+                "bloom filter size ["
+                    + bloomFilterSize
+                    + "] is too large; "
+                    + "must be "
+                    + MAX_BLOOM_FILTER_SIZE
+                    + " or less (rounded to nearest power of two)"
+            );
+        }
+        this.bloomFilterSizeInBits = (int) Math.multiplyExact(closestPowerOfTwoBloomFilterSizeInBytes, Byte.SIZE);
     }
 
     @Override
@@ -165,7 +162,7 @@ static class Writer extends StoredFieldsWriter {
             String bloomFilterFieldName,
             StoredFieldsWriter delegateWriter
         ) throws IOException {
-            assert isPowerOfTwo(bloomFilterSizeInBits) : "Bloom filter size is not a power of 2";
+            assert isPowerOfTwo(bloomFilterSizeInBits) : "Bloom filter size is not a power of 2: " + bloomFilterSizeInBits;
             assert numHashFunctions <= PRIMES.length
                 : "Number of hash functions must be <= " + PRIMES.length + " but was " + numHashFunctions;
 
@@ -223,43 +220,60 @@ public void finishDocument() throws IOException {
 
         @Override
         public void writeField(FieldInfo info, int value) throws IOException {
-            delegateWriter.writeField(info, value);
+            if (isBloomFilterField(info) == false) {
+                delegateWriter.writeField(info, value);
+            }
         }
 
         @Override
         public void writeField(FieldInfo info, long value) throws IOException {
-            delegateWriter.writeField(info, value);
+            if (isBloomFilterField(info) == false) {
+                delegateWriter.writeField(info, value);
+            }
         }
 
         @Override
         public void writeField(FieldInfo info, float value) throws IOException {
-            delegateWriter.writeField(info, value);
+            if (isBloomFilterField(info) == false) {
+                delegateWriter.writeField(info, value);
+            }
         }
 
         @Override
         public void writeField(FieldInfo info, double value) throws IOException {
-            delegateWriter.writeField(info, value);
+            if (isBloomFilterField(info) == false) {
+                delegateWriter.writeField(info, value);
+            }
         }
 
         @Override
         public void writeField(FieldInfo info, StoredFieldDataInput value) throws IOException {
-            delegateWriter.writeField(info, value);
+            if (isBloomFilterField(info) == false) {
+                delegateWriter.writeField(info, value);
+            }
         }
 
         @Override
         public void writeField(FieldInfo info, String value) throws IOException {
-            delegateWriter.writeField(info, value);
+            if (isBloomFilterField(info) == false) {
+                delegateWriter.writeField(info, value);
+            }
         }
 
         @Override
         public void writeField(FieldInfo info, BytesRef value) throws IOException {
-            if (info.getName().equals(bloomFilterFieldName)) {
+            if (isBloomFilterField(info)) {
                 addToBloomFilter(info, value);
             } else {
                 delegateWriter.writeField(info, value);
             }
         }
 
+        private boolean isBloomFilterField(FieldInfo info) {
+            return (bloomFilterFieldInfo != null && bloomFilterFieldInfo.getFieldNumber() == info.getFieldNumber())
+                || info.getName().equals(bloomFilterFieldName);
+        }
+
         private void addToBloomFilter(FieldInfo info, BytesRef value) {
             bloomFilterFieldInfo = info;
             var termHashes = hashTerm(value, hashes);

From 78eafbf72b33bb715638075d96d1fbb32c8196ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Thu, 30 Oct 2025 15:00:50 +0100
Subject: [PATCH 03/10] Add more randomness

---
 .../bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
index 165b8232a2f31..a24ddd7a2d4cc 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
@@ -57,11 +57,12 @@ protected Codec getCodec() {
         return new AssertingCodec() {
             @Override
             public StoredFieldsFormat storedFieldsFormat() {
+                var bloomFilterSizeInKb = atLeast(2);
                 return new ES93BloomFilterStoredFieldsFormat(
                     BigArrays.NON_RECYCLING_INSTANCE,
                     "",
                     TestUtil.getDefaultCodec().storedFieldsFormat(),
-                    ByteSizeValue.ofKb(2),
+                    ByteSizeValue.ofKb(bloomFilterSizeInKb),
                     IdFieldMapper.NAME
                 );
             }

From d26a6e1a628b25ab772420542d1a6b1c9550d63b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Thu, 30 Oct 2025 15:16:32 +0100
Subject: [PATCH 04/10] Update docs/changelog/137331.yaml

---
 docs/changelog/137331.yaml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 docs/changelog/137331.yaml

diff --git a/docs/changelog/137331.yaml b/docs/changelog/137331.yaml
new file mode 100644
index 0000000000000..fc267dd859adb
--- /dev/null
+++ b/docs/changelog/137331.yaml
@@ -0,0 +1,5 @@
+pr: 137331
+summary: Add ES93BloomFilterStoredFieldsFormat for efficient field existence checks
+area: TSDB
+type: enhancement
+issues: []

From d68c2ad3b1bfbc1243d36983c4331b8462923e8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Thu, 30 Oct 2025 23:08:52 +0100
Subject: [PATCH 05/10] Improve test a bit

---
 ...S93BloomFilterStoredFieldsFormatTests.java | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
index a24ddd7a2d4cc..75b9af85e9995 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
@@ -14,8 +14,8 @@
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
@@ -90,24 +90,15 @@ public void testBloomFilterFieldIsNotStoredAndBloomFilterCanBeChecked() throws I
             // We don't use the RandomIndexWriter because we want to control the settings so we get
             // deterministic test runs
             try (IndexWriter writer = new IndexWriter(directory, conf)) {
-                FieldType customType = new FieldType(TextField.TYPE_STORED);
-                customType.setTokenized(false);
-                Field hostField = newField("host", "", customType);
-                var fieldType = new FieldType();
-                fieldType.setStored(true);
-                fieldType.setTokenized(false);
-                Field idField = newField(random(), IdFieldMapper.NAME, getBytesRefFromString(""), fieldType);
-
                 List<BytesRef> indexedIds = new ArrayList<>();
                 var docCount = atLeast(50);
                 for (int i = 0; i < docCount; i++) {
                     Document doc = new Document();
-                    doc.add(idField);
                     var id = getBytesRefFromString(UUIDs.randomBase64UUID());
                     indexedIds.add(id);
-                    idField.setBytesValue(id);
-                    doc.add(hostField);
-                    hostField.setStringValue("host-" + i);
+                    doc.add(new StringField(IdFieldMapper.NAME, id, Field.Store.YES));
+                    doc.add(new StringField("host", "host-" + i, Field.Store.YES));
+                    doc.add(new LongField("counter", i, Field.Store.YES));
                     writer.addDocument(doc);
                 }
 
@@ -139,6 +130,7 @@ public void testBloomFilterFieldIsNotStoredAndBloomFilterCanBeChecked() throws I
                         // The _id field is not actually stored, just used to build the bloom filter
                         assertThat(document.get(IdFieldMapper.NAME), nullValue());
                         assertThat(document.get("host"), not(nullValue()));
+                        assertThat(document.get("counter"), not(nullValue()));
                     }
                 }
             }

From 49fcc663a5162d045b8a9ba4dd150faf93a111e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 3 Nov 2025 09:51:26 +0100
Subject: [PATCH 06/10] Add more assertions

---
 .../bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
index 75b9af85e9995..6796d82150d15 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
@@ -40,6 +40,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
@@ -130,7 +131,9 @@ public void testBloomFilterFieldIsNotStoredAndBloomFilterCanBeChecked() throws I
                         // The _id field is not actually stored, just used to build the bloom filter
                         assertThat(document.get(IdFieldMapper.NAME), nullValue());
                         assertThat(document.get("host"), not(nullValue()));
+                        assertThat(document.get("host"), is(equalTo("host-" + docId)));
                         assertThat(document.get("counter"), not(nullValue()));
+                        assertThat(document.getField("counter").storedValue().getLongValue(), is(equalTo((long) docId)));
                     }
                 }
             }

From 605c93de9cf0df375d6af7d37c48c66155b47740 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 3 Nov 2025 09:57:05 +0100
Subject: [PATCH 07/10] Start version from 0

---
 .../codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
index 600912fdcece0..b0bba971277f4 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
@@ -69,7 +69,7 @@ public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat {
     public static final String STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME = "ES93BloomFilterStoredFieldsFormat";
     public static final String STORED_FIELDS_BLOOM_FILTER_EXTENSION = "sfbf";
     public static final String STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION = "sfbfm";
-    private static final int VERSION_START = 1;
+    private static final int VERSION_START = 0;
     private static final int VERSION_CURRENT = VERSION_START;
 
     // We use prime numbers with the Kirsch-Mitzenmacher technique to obtain multiple hashes from two hash functions

From 1e4ea4c7b7d7caaa2537e6c1ef7e53520b0b2d1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 3 Nov 2025 09:58:06 +0100
Subject: [PATCH 08/10] Use toIntExact

---
 .../codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
index b0bba971277f4..054215d42cc10 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
@@ -114,7 +114,7 @@ public ES93BloomFilterStoredFieldsFormat(
                     + " or less (rounded to nearest power of two)"
             );
         }
-        this.bloomFilterSizeInBits = (int) Math.multiplyExact(closestPowerOfTwoBloomFilterSizeInBytes, Byte.SIZE);
+        this.bloomFilterSizeInBits = Math.toIntExact(Math.multiplyExact(closestPowerOfTwoBloomFilterSizeInBytes, Byte.SIZE));
     }
 
     @Override

From 4293207129d397cffea94e5fc1847ef1711b64c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 3 Nov 2025 09:59:09 +0100
Subject: [PATCH 09/10] Add assertion for bloom filter name

---
 .../codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
index 054215d42cc10..800193a3d3c38 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
@@ -275,6 +275,7 @@ private boolean isBloomFilterField(FieldInfo info) {
         }
 
         private void addToBloomFilter(FieldInfo info, BytesRef value) {
+            assert info.getName().equals(bloomFilterFieldName): "Expected " + bloomFilterFieldName + " but got " + info;
             bloomFilterFieldInfo = info;
             var termHashes = hashTerm(value, hashes);
             for (int hash : termHashes) {

From 65506011d14729672f26f41f57b99ebbdcb4b4d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 3 Nov 2025 10:22:35 +0100
Subject: [PATCH 10/10] Extract interface

---
 .../ES93BloomFilterStoredFieldsFormat.java    | 37 ++++++++++++-------
 ...S93BloomFilterStoredFieldsFormatTests.java | 27 ++++++--------
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
index 800193a3d3c38..63719af9135c2 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java
@@ -275,7 +275,7 @@ private boolean isBloomFilterField(FieldInfo info) {
         }
 
         private void addToBloomFilter(FieldInfo info, BytesRef value) {
-            assert info.getName().equals(bloomFilterFieldName): "Expected " + bloomFilterFieldName + " but got " + info;
+            assert info.getName().equals(bloomFilterFieldName) : "Expected " + bloomFilterFieldName + " but got " + info;
             bloomFilterFieldInfo = info;
             var termHashes = hashTerm(value, hashes);
             for (int hash : termHashes) {
@@ -338,7 +338,7 @@ public long ramBytesUsed() {
         }
     }
 
-    public static class Reader extends StoredFieldsReader {
+    private static class Reader extends StoredFieldsReader implements BloomFilterProvider {
         @Nullable
         private final BloomFilterFieldReader bloomFilterFieldReader;
         private final StoredFieldsReader delegateReader;
@@ -386,8 +386,8 @@ public void document(int docID, StoredFieldVisitor visitor) throws IOException {
             delegateReader.document(docID, visitor);
         }
 
-        @Nullable
-        BloomFilterFieldReader getBloomFilterFieldReader() {
+        @Override
+        public BloomFilter getBloomFilter() throws IOException {
             return bloomFilterFieldReader;
         }
     }
@@ -418,7 +418,7 @@ static BloomFilterMetadata readFrom(IndexInput in, FieldInfos fieldInfos) throws
         }
     }
 
-    public static class BloomFilterFieldReader implements Closeable {
+    static class BloomFilterFieldReader implements BloomFilter {
         private final FieldInfo fieldInfo;
         private final IndexInput bloomFilterData;
         private final RandomAccessInput bloomFilterIn;
@@ -426,13 +426,8 @@ public static class BloomFilterFieldReader implements Closeable {
         private final int[] hashes;
 
         @Nullable
-        public static BloomFilterFieldReader open(
-            Directory directory,
-            SegmentInfo si,
-            FieldInfos fn,
-            IOContext context,
-            String segmentSuffix
-        ) throws IOException {
+        static BloomFilterFieldReader open(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context, String segmentSuffix)
+            throws IOException {
             List<Closeable> toClose = new ArrayList<>();
             boolean success = false;
             try (var metaInput = directory.openChecksumInput(bloomFilterMetadataFileName(si, segmentSuffix))) {
@@ -486,7 +481,7 @@ public static BloomFilterFieldReader open(
             }
         }
 
-        public BloomFilterFieldReader(
+        BloomFilterFieldReader(
             FieldInfo fieldInfo,
             RandomAccessInput bloomFilterIn,
             int bloomFilterSizeInBits,
@@ -551,4 +546,20 @@ private static String bloomFilterMetadataFileName(SegmentInfo segmentInfo, Strin
     private static String bloomFilterFileName(SegmentInfo segmentInfo, String segmentSuffix) {
         return IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, STORED_FIELDS_BLOOM_FILTER_EXTENSION);
     }
+
+    public interface BloomFilter extends Closeable {
+        /**
+         * Tests whether the given term may exist in the specified field.
+         *
+         * @param field the field name to check
+         * @param term the term to test for membership
+         * @return true if term may be present, false if definitely absent
+         */
+        boolean mayContainTerm(String field, BytesRef term) throws IOException;
+    }
+
+    public interface BloomFilterProvider extends Closeable {
+        @Nullable
+        BloomFilter getBloomFilter() throws IOException;
+    }
 }
diff --git a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
index 6796d82150d15..ebcf5fcec5758 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java
@@ -105,23 +105,17 @@ public void testBloomFilterFieldIsNotStoredAndBloomFilterCanBeChecked() throws I
 
                 try (var directoryReader = StandardDirectoryReader.open(writer)) {
                     for (LeafReaderContext leaf : directoryReader.leaves()) {
-                        try (ES93BloomFilterStoredFieldsFormat.Reader fieldReader = getBloomFilterReaderFrom(leaf)) {
-                            var bloomFilterFieldReader = fieldReader.getBloomFilterFieldReader();
+                        try (ES93BloomFilterStoredFieldsFormat.BloomFilterProvider fieldReader = getBloomFilterProvider(leaf)) {
+                            var bloomFilter = fieldReader.getBloomFilter();
                             // the bloom filter reader is null only if the _id field is not stored during indexing
-                            assertThat(bloomFilterFieldReader, is(not(nullValue())));
+                            assertThat(bloomFilter, is(not(nullValue())));
 
                             for (BytesRef indexedId : indexedIds) {
-                                assertThat(bloomFilterFieldReader.mayContainTerm(IdFieldMapper.NAME, indexedId), is(true));
+                                assertThat(bloomFilter.mayContainTerm(IdFieldMapper.NAME, indexedId), is(true));
                             }
-                            assertThat(
-                                bloomFilterFieldReader.mayContainTerm(IdFieldMapper.NAME, getBytesRefFromString("random")),
-                                is(false)
-                            );
-
-                            assertThat(
-                                bloomFilterFieldReader.mayContainTerm(IdFieldMapper.NAME, getBytesRefFromString("12345")),
-                                is(false)
-                            );
+                            assertThat(bloomFilter.mayContainTerm(IdFieldMapper.NAME, getBytesRefFromString("random")), is(false));
+
+                            assertThat(bloomFilter.mayContainTerm(IdFieldMapper.NAME, getBytesRefFromString("12345")), is(false));
                         }
                     }
 
@@ -144,7 +138,8 @@ private static BytesRef getBytesRefFromString(String random) {
         return new BytesRef(random.getBytes(StandardCharsets.UTF_8));
     }
 
-    private ES93BloomFilterStoredFieldsFormat.Reader getBloomFilterReaderFrom(LeafReaderContext leafReaderContext) throws IOException {
+    private ES93BloomFilterStoredFieldsFormat.BloomFilterProvider getBloomFilterProvider(LeafReaderContext leafReaderContext)
+        throws IOException {
         LeafReader reader = leafReaderContext.reader();
         var fieldInfos = reader.getFieldInfos();
         assertThat(reader, is(instanceOf(SegmentReader.class)));
@@ -152,7 +147,7 @@ private ES93BloomFilterStoredFieldsFormat.Reader getBloomFilterReaderFrom(LeafRe
         SegmentInfo si = segmentReader.getSegmentInfo().info;
 
         var storedFieldsReader = si.getCodec().storedFieldsFormat().fieldsReader(si.dir, si, fieldInfos, IOContext.DEFAULT);
-        assertThat(storedFieldsReader, is(instanceOf(ES93BloomFilterStoredFieldsFormat.Reader.class)));
-        return (ES93BloomFilterStoredFieldsFormat.Reader) storedFieldsReader;
+        assertThat(storedFieldsReader, is(instanceOf(ES93BloomFilterStoredFieldsFormat.BloomFilterProvider.class)));
+        return ((ES93BloomFilterStoredFieldsFormat.BloomFilterProvider) storedFieldsReader);
     }
 }