Merge branch 'main' into index-version-compatibility-IT

elasticmachine · web-flow · commit ec51d4518e87 · 2025-03-28T20:25:08.000+01:00
diff --git a/docs/changelog/125881.yaml b/docs/changelog/125881.yaml
@@ -0,0 +1,5 @@
+pr: 125881
+summary: Fixes a invalid warning from being issued when restoring a system data stream from a snapshot.
+area: "Data streams"
+type: bug
+issues: []
diff --git a/docs/reference/query-languages/esql/limitations.md b/docs/reference/query-languages/esql/limitations.md
@@ -10,20 +10,54 @@ mapped_pages:
 
 ## Result set size limit [esql-max-rows]
 
-By default, an {{esql}} query returns up to 1000 rows. You can increase the number of rows up to 10,000 using the [`LIMIT`](/reference/query-languages/esql/esql-commands.md#esql-limit) command. Queries do not return more than 10,000 rows, regardless of the `LIMIT` command’s value.
+By default, an {{esql}} query returns up to 1,000 rows. You can increase the number of rows up to 10,000 using the [`LIMIT`](/reference/query-languages/esql/esql-commands.md#esql-limit) command.
 
-This limit only applies to the number of rows that are retrieved by the query. Queries and aggregations run on the full data set.
+For instance,
+```esql
+FROM index | WHERE field = "value"
+```
+is equivalent to:
+```esql
+FROM index | WHERE field = "value" | LIMIT 1000
+```
+
+Queries do not return more than 10,000 rows, regardless of the `LIMIT` command’s value. This is a configurable upper limit.
 
 To overcome this limitation:
 
 * Reduce the result set size by modifying the query to only return relevant data. Use [`WHERE`](/reference/query-languages/esql/esql-commands.md#esql-where) to select a smaller subset of the data.
 * Shift any post-query processing to the query itself. You can use the {{esql}} [`STATS`](/reference/query-languages/esql/esql-commands.md#esql-stats-by) command to aggregate data in the query.
 
+The upper limit only applies to the number of rows that are output by the query, not to the number of documents it processes: the query runs on the full data set.
+
+Consider the following two queries:
+```esql
+FROM index | WHERE field0 == "value" | LIMIT 20000
+```
+and
+```esql
+FROM index | STATS AVG(field1) BY field2 | LIMIT 20000
+```
+
+In both cases, the filtering by `field0` in the first query or the grouping by `field2` in the second is applied over all the documents present in the `index`, irrespective of their number or indexes size. However, both queries will return at most 10,000 rows, even if there were more rows available to return.
+
 The default and maximum limits can be changed using these dynamic cluster settings:
 
 * `esql.query.result_truncation_default_size`
 * `esql.query.result_truncation_max_size`
 
+However, doing so involves trade-offs. A larger result-set involves a higher memory pressure and increased processing times; the internode traffic within and across clusters can also increase.
+
+These limitations are similar to those enforced by the [search API for pagination](/reference/elasticsearch/rest-apis/paginate-search-results.md#paginate-search-results).
+
+| Functionality                    | Search                  | {{esql}}                                  |
+|----------------------------------|-------------------------|-------------------------------------------|
+| Results returned by default      | 10                      | 1.000                                     |
+| Default upper limit              | 10,000                  | 10,000                                    |
+| Specify number of results        | `size`                  | `LIMIT`                                   |
+| Change default number of results | n/a                     | esql.query.result_truncation_default_size |
+| Change default upper limit       | index-max-result-window | esql.query.result_truncation_max_size     |
+
 
 ## Field types [esql-supported-types]
 
diff --git a/docs/reference/query-languages/query-dsl/query-dsl-script-score-query.md b/docs/reference/query-languages/query-dsl/query-dsl-script-score-query.md
@@ -47,7 +47,7 @@ Final relevance scores from the `script_score` query cannot be negative. To supp
 
 
 `min_score`
-:   (Optional, float) Documents with a score lower than this floating point number are excluded from the search results.
+:   (Optional, float) Documents with a score lower than this floating point number are excluded from search results and results collected by aggregations.
 
 `boost`
 :   (Optional, float) Documents' scores produced by `script` are multiplied by `boost` to produce final documents' scores. Defaults to `1.0`.
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java
@@ -29,6 +29,7 @@
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.internal.hppc.IntObjectHashMap;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ChecksumIndexInput;
@@ -43,33 +44,31 @@
 import org.elasticsearch.core.IOUtils;
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 
 import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL;
 import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL;
 import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
 
 public class ES87TSDBDocValuesProducer extends DocValuesProducer {
-    private final Map<String, NumericEntry> numerics;
-    private final Map<String, BinaryEntry> binaries;
-    private final Map<String, SortedEntry> sorted;
-    private final Map<String, SortedSetEntry> sortedSets;
-    private final Map<String, SortedNumericEntry> sortedNumerics;
-    private final Map<String, DocValuesSkipperEntry> skippers;
+    private final IntObjectHashMap<NumericEntry> numerics;
+    private final IntObjectHashMap<BinaryEntry> binaries;
+    private final IntObjectHashMap<SortedEntry> sorted;
+    private final IntObjectHashMap<SortedSetEntry> sortedSets;
+    private final IntObjectHashMap<SortedNumericEntry> sortedNumerics;
+    private final IntObjectHashMap<DocValuesSkipperEntry> skippers;
     private final IndexInput data;
     private final int maxDoc;
     private final int version;
     private final boolean merging;
 
     ES87TSDBDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension)
         throws IOException {
-        this.numerics = new HashMap<>();
-        this.binaries = new HashMap<>();
-        this.sorted = new HashMap<>();
-        this.sortedSets = new HashMap<>();
-        this.sortedNumerics = new HashMap<>();
-        this.skippers = new HashMap<>();
+        this.numerics = new IntObjectHashMap<>();
+        this.binaries = new IntObjectHashMap<>();
+        this.sorted = new IntObjectHashMap<>();
+        this.sortedSets = new IntObjectHashMap<>();
+        this.sortedNumerics = new IntObjectHashMap<>();
+        this.skippers = new IntObjectHashMap<>();
         this.maxDoc = state.segmentInfo.maxDoc();
         this.merging = false;
 
@@ -130,12 +129,12 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
     }
 
     private ES87TSDBDocValuesProducer(
-        Map<String, NumericEntry> numerics,
-        Map<String, BinaryEntry> binaries,
-        Map<String, SortedEntry> sorted,
-        Map<String, SortedSetEntry> sortedSets,
-        Map<String, SortedNumericEntry> sortedNumerics,
-        Map<String, DocValuesSkipperEntry> skippers,
+        IntObjectHashMap<NumericEntry> numerics,
+        IntObjectHashMap<BinaryEntry> binaries,
+        IntObjectHashMap<SortedEntry> sorted,
+        IntObjectHashMap<SortedSetEntry> sortedSets,
+        IntObjectHashMap<SortedNumericEntry> sortedNumerics,
+        IntObjectHashMap<DocValuesSkipperEntry> skippers,
         IndexInput data,
         int maxDoc,
         int version,
@@ -160,13 +159,13 @@ public DocValuesProducer getMergeInstance() {
 
     @Override
     public NumericDocValues getNumeric(FieldInfo field) throws IOException {
-        NumericEntry entry = numerics.get(field.name);
+        NumericEntry entry = numerics.get(field.number);
         return getNumeric(entry, -1);
     }
 
     @Override
     public BinaryDocValues getBinary(FieldInfo field) throws IOException {
-        BinaryEntry entry = binaries.get(field.name);
+        BinaryEntry entry = binaries.get(field.number);
         if (entry.docsWithFieldOffset == -2) {
             return DocValues.emptyBinary();
         }
@@ -320,7 +319,7 @@ public boolean advanceExact(int target) throws IOException {
 
     @Override
     public SortedDocValues getSorted(FieldInfo field) throws IOException {
-        SortedEntry entry = sorted.get(field.name);
+        SortedEntry entry = sorted.get(field.number);
         return getSorted(entry);
     }
 
@@ -675,13 +674,13 @@ public int docFreq() throws IOException {
 
     @Override
     public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
-        SortedNumericEntry entry = sortedNumerics.get(field.name);
+        SortedNumericEntry entry = sortedNumerics.get(field.number);
         return getSortedNumeric(entry, -1);
     }
 
     @Override
     public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
-        SortedSetEntry entry = sortedSets.get(field.name);
+        SortedSetEntry entry = sortedSets.get(field.number);
         if (entry.singleValueEntry != null) {
             return DocValues.singleton(getSorted(entry.singleValueEntry));
         }
@@ -743,7 +742,7 @@ public long cost() {
 
     @Override
     public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
-        final DocValuesSkipperEntry entry = skippers.get(field.name);
+        final DocValuesSkipperEntry entry = skippers.get(field.number);
 
         final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
         // Prefetch the first page of data. Following pages are expected to get prefetched through
@@ -869,18 +868,18 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
             }
             byte type = meta.readByte();
             if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
-                skippers.put(info.name, readDocValueSkipperMeta(meta));
+                skippers.put(info.number, readDocValueSkipperMeta(meta));
             }
             if (type == ES87TSDBDocValuesFormat.NUMERIC) {
-                numerics.put(info.name, readNumeric(meta));
+                numerics.put(info.number, readNumeric(meta));
             } else if (type == ES87TSDBDocValuesFormat.BINARY) {
-                binaries.put(info.name, readBinary(meta));
+                binaries.put(info.number, readBinary(meta));
             } else if (type == ES87TSDBDocValuesFormat.SORTED) {
-                sorted.put(info.name, readSorted(meta));
+                sorted.put(info.number, readSorted(meta));
             } else if (type == ES87TSDBDocValuesFormat.SORTED_SET) {
-                sortedSets.put(info.name, readSortedSet(meta));
+                sortedSets.put(info.number, readSortedSet(meta));
             } else if (type == ES87TSDBDocValuesFormat.SORTED_NUMERIC) {
-                sortedNumerics.put(info.name, readSortedNumeric(meta));
+                sortedNumerics.put(info.number, readSortedNumeric(meta));
             } else {
                 throw new CorruptIndexException("invalid type: " + type, meta);
             }
diff --git a/server/src/main/java/org/elasticsearch/snapshots/RestoreService.java b/server/src/main/java/org/elasticsearch/snapshots/RestoreService.java
@@ -539,9 +539,19 @@ private void validateDataStreamTemplatesExistAndWarnIfMissing(
         Set<String> templatePatterns = streams.filter(cit -> cit.getDataStreamTemplate() != null)
             .flatMap(cit -> cit.indexPatterns().stream())
             .collect(Collectors.toSet());
+        warnIfIndexTemplateMissing(dataStreamsToRestore, templatePatterns, snapshotInfo);
+    }
 
-        for (String name : dataStreamsToRestore.keySet()) {
-            if (templatePatterns.stream().noneMatch(pattern -> Regex.simpleMatch(pattern, name))) {
+    // Visible for testing
+    static void warnIfIndexTemplateMissing(
+        Map<String, DataStream> dataStreamsToRestore,
+        Set<String> templatePatterns,
+        SnapshotInfo snapshotInfo
+    ) {
+        for (var entry : dataStreamsToRestore.entrySet()) {
+            String name = entry.getKey();
+            DataStream dataStream = entry.getValue();
+            if (dataStream.isSystem() == false && templatePatterns.stream().noneMatch(pattern -> Regex.simpleMatch(pattern, name))) {
                 String warningMessage = format(
                     "Snapshot [%s] contains data stream [%s] but custer does not have a matching index template. This will cause"
                         + " rollover to fail until a matching index template is created",
diff --git a/server/src/test/java/org/elasticsearch/snapshots/RestoreServiceTests.java b/server/src/test/java/org/elasticsearch/snapshots/RestoreServiceTests.java
@@ -36,6 +36,7 @@
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import static org.elasticsearch.core.Strings.format;
 import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.Matchers.equalTo;
 import static org.mockito.ArgumentMatchers.any;
@@ -48,6 +49,50 @@
 
 public class RestoreServiceTests extends ESTestCase {
 
+    /**
+     * Test that {@link RestoreService#warnIfIndexTemplateMissing(Map, Set, SnapshotInfo)} does not warn for system
+     * datastreams.
+     */
+    public void testWarnIfIndexTemplateMissingSkipsSystemDataStreams() throws Exception {
+        String dataStreamName = ".test-system-data-stream";
+        String backingIndexName = DataStream.getDefaultBackingIndexName(dataStreamName, 1);
+        List<Index> indices = List.of(new Index(backingIndexName, randomUUID()));
+
+        var dataStream = DataStream.builder(dataStreamName, indices).setSystem(true).setHidden(true).build();
+        var dataStreamsToRestore = Map.of(dataStreamName, dataStream);
+        var templatePatterns = Set.of("matches_none");
+        var snapshotInfo = createSnapshotInfo(new Snapshot("repository", new SnapshotId("name", "uuid")), Boolean.FALSE);
+
+        RestoreService.warnIfIndexTemplateMissing(dataStreamsToRestore, templatePatterns, snapshotInfo);
+
+        ensureNoWarnings();
+    }
+
+    /**
+     * Test that {@link RestoreService#warnIfIndexTemplateMissing(Map, Set, SnapshotInfo)} warns for non-system datastreams.
+     */
+    public void testWarnIfIndexTemplateMissing() throws Exception {
+        String dataStreamName = ".test-system-data-stream";
+        String backingIndexName = DataStream.getDefaultBackingIndexName(dataStreamName, 1);
+        List<Index> indices = List.of(new Index(backingIndexName, randomUUID()));
+
+        var dataStream = DataStream.builder(dataStreamName, indices).build();
+        var dataStreamsToRestore = Map.of(dataStreamName, dataStream);
+        var templatePatterns = Set.of("matches_none");
+        var snapshotInfo = createSnapshotInfo(new Snapshot("repository", new SnapshotId("name", "uuid")), Boolean.FALSE);
+
+        RestoreService.warnIfIndexTemplateMissing(dataStreamsToRestore, templatePatterns, snapshotInfo);
+
+        assertWarnings(
+            format(
+                "Snapshot [%s] contains data stream [%s] but custer does not have a matching index template. This will cause"
+                    + " rollover to fail until a matching index template is created",
+                snapshotInfo.snapshotId(),
+                dataStreamName
+            )
+        );
+    }
+
     public void testUpdateDataStream() {
         long now = System.currentTimeMillis();
         String dataStreamName = "data-stream-1";