elastic · ChrisHegarty · Jul 31, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/Int7ScorerBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/Int7ScorerBenchmark.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+package org.elasticsearch.benchmark.vector;
+
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
+import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.simdvec.ES91Int4VectorsScorer;
+import org.elasticsearch.simdvec.ES92Int7VectorsScorer;
+import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+// first iteration is complete garbage, so make sure we really warmup
+@Warmup(iterations = 4, time = 1)
+// real iterations. not useful to spend tons of time here, better to fork more
+@Measurement(iterations = 5, time = 1)
+// engage some noise reduction
+@Fork(value = 1)
+public class Int7ScorerBenchmark {
+
+    static {
+        LogConfigurator.configureESLogging(); // native access requires logging to be initialized
+    }
+
+    @Param({ "384", "782", "1024" })
+    int dims;
+
+    int numVectors = 20 * ES92Int7VectorsScorer.BULK_SIZE;
+    int numQueries = 5;
+
+    byte[] scratch;
+    byte[][] binaryVectors;
+    byte[][] binaryQueries;
+    float[] scores = new float[ES92Int7VectorsScorer.BULK_SIZE];
+
+    ES92Int7VectorsScorer scorer;
+    Directory dir;
+    IndexInput in;
+
+    OptimizedScalarQuantizer.QuantizationResult queryCorrections;
+    float centroidDp;
+
+    @Setup
+    public void setup() throws IOException {
+        binaryVectors = new byte[numVectors][dims];
+        dir = new MMapDirectory(Files.createTempDirectory("vectorData"));
+        try (IndexOutput out = dir.createOutput("vectors", IOContext.DEFAULT)) {
+            for (byte[] binaryVector : binaryVectors) {
+                for (int i = 0; i < dims; i++) {
+                    // 4-bit quantization
+                    binaryVector[i] = (byte) ThreadLocalRandom.current().nextInt(128);
+                }
+                out.writeBytes(binaryVector, 0, binaryVector.length);
+                ThreadLocalRandom.current().nextBytes(binaryVector);
+                out.writeBytes(binaryVector, 0, 16); // corrections
+            }
+        }
+
+        queryCorrections = new OptimizedScalarQuantizer.QuantizationResult(
+            ThreadLocalRandom.current().nextFloat(),
+            ThreadLocalRandom.current().nextFloat(),
+            ThreadLocalRandom.current().nextFloat(),
+            Short.toUnsignedInt((short) ThreadLocalRandom.current().nextInt())
+        );
+        centroidDp = ThreadLocalRandom.current().nextFloat();
+
+        in = dir.openInput("vectors", IOContext.DEFAULT);
+        binaryQueries = new byte[numVectors][dims];
+        for (byte[] binaryVector : binaryVectors) {
+            for (int i = 0; i < dims; i++) {
+                // 7-bit quantization
+                binaryVector[i] = (byte) ThreadLocalRandom.current().nextInt(128);
+            }
+        }
+
+        scratch = new byte[dims];
+        scorer = ESVectorizationProvider.getInstance().newES92Int7VectorsScorer(in, dims);
+    }
+
+    @TearDown
+    public void teardown() throws IOException {
+        IOUtils.close(dir, in);
+    }
+
+    @Benchmark
+    @Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
+    public void scoreFromMemorySegment(Blackhole bh) throws IOException {
+        for (int j = 0; j < numQueries; j++) {
+            in.seek(0);
+            for (int i = 0; i < numVectors; i++) {
+                bh.consume(
+                    scorer.score(
+                        binaryQueries[j],
+                        queryCorrections.lowerInterval(),
+                        queryCorrections.upperInterval(),
+                        queryCorrections.quantizedComponentSum(),
+                        queryCorrections.additionalCorrection(),
+                        VectorSimilarityFunction.EUCLIDEAN,
+                        centroidDp
+                    )
+                );
+            }
+        }
+    }
+
+    @Benchmark
+    @Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
+    public void scoreFromMemorySegmentBulk(Blackhole bh) throws IOException {
+        for (int j = 0; j < numQueries; j++) {
+            in.seek(0);
+            for (int i = 0; i < numVectors; i += ES91Int4VectorsScorer.BULK_SIZE) {
+                scorer.scoreBulk(
+                    binaryQueries[j],
+                    queryCorrections.lowerInterval(),
+                    queryCorrections.upperInterval(),
+                    queryCorrections.quantizedComponentSum(),
+                    queryCorrections.additionalCorrection(),
+                    VectorSimilarityFunction.EUCLIDEAN,
+                    centroidDp,
+                    scores
+                );
+                for (float score : scores) {
+                    bh.consume(score);
+                }
+            }
+        }
+    }
+}
@@ -57,13 +57,9 @@ public void apply(Project project) {
             task.into("META-INF", copy -> copy.from(testBuildInfoTask));
         });
 
-        if (project.getRootProject().getName().equals("elasticsearch")) {
-            project.getTasks()
-                .withType(Test.class)
-                .matching(test -> List.of("test", "internalClusterTest").contains(test.getName()))
-                .configureEach(test -> {
-                    test.systemProperty("es.entitlement.enableForTests", "true");
-                });
-        }
+        project.getTasks()
+            .withType(Test.class)
+            .matching(test -> List.of("test", "internalClusterTest").contains(test.getName()))
+            .configureEach(test -> test.getSystemProperties().putIfAbsent("es.entitlement.enableForTests", "true"));
     }
 }
@@ -11,7 +11,7 @@ tags:
 # Build args passed to Dockerfile ARGs
 args:
   BASE_IMAGE: "redhat/ubi/ubi9"
-  BASE_TAG: "9.5"
+  BASE_TAG: "9.6"
 # Docker image labels
 labels:
   org.opencontainers.image.title: "elasticsearch"

diff --git a/docs/changelog/113949.yaml b/docs/changelog/113949.yaml
@@ -0,0 +1,7 @@
+pr: 113949
+summary: Support kNN filter on nested metadata
+area: Vector Search
+type: enhancement
+issues:
+ - 128803
+ - 106994
diff --git a/docs/changelog/129662.yaml b/docs/changelog/129662.yaml
@@ -0,0 +1,6 @@
+pr: 129662
+summary: "[Security] Add entity store and asset criticality index privileges to built\
+  \ in Editor, Viewer and Kibana System roles"
+area: Authorization
+type: enhancement
+issues: []
diff --git a/docs/changelog/131261.yaml b/docs/changelog/131261.yaml
@@ -0,0 +1,13 @@
+pr: 131261
+summary: Enable Failure Store for new logs-*-* data streams
+area: Data streams
+type: feature
+issues:
+ - 131105
+highlight:
+  title: Enable Failure Store for new logs data streams
+  body: |-
+    The [Failure Store](docs-content://manage-data/data-store/data-streams/failure-store.md) is now enabled by default for new logs data streams matching the pattern `logs-*-*`. This means that such data streams will now store invalid documents in a
+    dedicated failure index instead of rejecting them, allowing better visibility and control over data quality issues without loosing data. This can be [enabled manually](docs-content://manage-data/data-store/data-streams/failure-store.md#set-up-failure-store-existing) for existing data streams. 
+    Note: With the failure store enabled, the http response code clients receive when indexing invalid documents will change from `400 Bad Request` to `201 Created`, with an additional response attribute `"failure_store" : "used"`. 
+  notable: true
diff --git a/docs/changelog/131517.yaml b/docs/changelog/131517.yaml
@@ -0,0 +1,5 @@
+pr: 131517
+summary: Refresh potential lost connections at query start for field caps
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/131937.yaml b/docs/changelog/131937.yaml
@@ -0,0 +1,5 @@
+pr: 131937
+summary: Fix race condition in `RemoteClusterService.collectNodes()`
+area: Distributed
+type: bug
+issues: []
diff --git a/docs/changelog/132101.yaml b/docs/changelog/132101.yaml
@@ -0,0 +1,6 @@
+pr: 132101
+summary: Simulate ingest API uses existing index mapping when `mapping_addition` is
+  given
+area: Ingest Node
+type: bug
+issues: []
diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/completion.md b/docs/reference/query-languages/esql/_snippets/commands/layout/completion.md
@@ -9,10 +9,26 @@ The `COMPLETION` command allows you to send prompts and context to a Large Langu
 
 **Syntax**
 
+::::{tab-set}
+
+:::{tab-item} 9.2.0+
+
 ```esql
-COMPLETION [column =] prompt WITH inference_id
+COMPLETION [column =] prompt WITH { "inference_id" : "my_inference_endpoint" }
 ```
 
+:::
+
+:::{tab-item} 9.1.x only
+
+```esql
+COMPLETION [column =] prompt WITH my_inference_endpoint
+```
+
+:::
+
+::::
+
 **Parameters**
 
 `column`
@@ -24,7 +40,7 @@ COMPLETION [column =] prompt WITH inference_id
 :   The input text or expression used to prompt the LLM.
     This can be a string literal or a reference to a column containing text.
 
-`inference_id`
+`my_inference_endpoint`
 :   The ID of the [inference endpoint](docs-content://explore-analyze/elastic-inference/inference-api.md) to use for the task.
     The inference endpoint must be configured with the `completion` task type.
 
@@ -75,7 +91,7 @@ How you increase the timeout depends on your deployment type:
 If you don't want to increase the timeout limit, try the following:
 
 * Reduce data volume with `LIMIT` or more selective filters before the `COMPLETION` command
-* Split complex operations into multiple simpler queries 
+* Split complex operations into multiple simpler queries
 * Configure your HTTP client's response timeout (Refer to [HTTP client configuration](/reference/elasticsearch/configuration-reference/networking-settings.md#_http_client_configuration))
 
 
@@ -85,7 +101,7 @@ Use the default column name (results stored in `completion` column):
 
 ```esql
 ROW question = "What is Elasticsearch?"
-| COMPLETION question WITH test_completion_model
+| COMPLETION question WITH { "inference_id" : "my_inference_endpoint" }
 | KEEP question, completion
 ```
 
@@ -97,7 +113,7 @@ Specify the output column (results stored in `answer` column):
 
 ```esql
 ROW question = "What is Elasticsearch?"
-| COMPLETION answer = question WITH test_completion_model
+| COMPLETION answer = question WITH { "inference_id" : "my_inference_endpoint" }
 | KEEP question, answer
 ```
 
@@ -117,7 +133,7 @@ FROM movies
    "Synopsis: ", synopsis, "\n",
    "Actors: ", MV_CONCAT(actors, ", "), "\n",
   )
-| COMPLETION summary = prompt WITH test_completion_model
+| COMPLETION summary = prompt WITH { "inference_id" : "my_inference_endpoint" }
 | KEEP title, summary, rating
 ```
 

diff --git a/docs/reference/query-languages/query-dsl/query-dsl-knn-query.md b/docs/reference/query-languages/query-dsl/query-dsl-knn-query.md
@@ -203,10 +203,19 @@ POST my-image-index/_search
 `knn` query can be used inside a nested query. The behaviour here is similar to [top level nested kNN search](docs-content://solutions/search/vector/knn.md#nested-knn-search):
 
 * kNN search over nested dense_vectors diversifies the top results over the top-level document
-* `filter`  over the top-level document metadata is supported and acts as a pre-filter
-* `filter` over `nested` field metadata is not supported
+* `filter` both over the top-level document metadata and `nested` is supported and acts as a pre-filter
+
+::::{note}
+To ensure correct results: each individual filter must be either over
+the top-level metadata or `nested` metadata. However, a single knn query
+supports multiple filters, where some filters can be over the top-level
+metadata and some over nested.
+::::
 
-A sample query can look like below:
+
+Below is a sample query with filter over nested metadata.
+For scoring parents' documents,  this query only considers vectors that
+have "paragraph.language" set to "EN".
 
 ```json
 {
@@ -215,12 +224,46 @@ A sample query can look like below:
       "path" : "paragraph",
         "query" : {
           "knn": {
-            "query_vector": [
-                0.45,
-                45
-            ],
+            "query_vector": [0.45, 0.50],
             "field": "paragraph.vector",
-            "num_candidates": 2
+            "filter": {
+              "match": {
+                "paragraph.language": "EN"
+              }
+            }
+        }
+      }
+    }
+  }
+}
+```
+
+Below is a sample query with two filters: one over nested metadata
+and another over the top level metadata. For scoring parents' documents,
+this query only considers vectors whose parent's title contain "essay"
+word and have "paragraph.language" set to "EN".
+
+```json
+{
+  "query" : {
+    "nested" : {
+      "path" : "paragraph",
+      "query" : {
+        "knn": {
+          "query_vector": [0.45, 0.50],
+          "field": "paragraph.vector",
+          "filter": [
+            {
+              "match": {
+                "paragraph.language": "EN"
+              }
+            },
+            {
+              "match": {
+                "title": "essay"
+              }
+            }
+          ]
         }
       }
     }