elastic · svilen-mihaylov-elastic · Jan 7, 2025 · Dec 17, 2024 · Jan 2, 2025 · Jan 2, 2025
diff --git a/docs/reference/search/search-your-data/highlighting.asciidoc b/docs/reference/search/search-your-data/highlighting.asciidoc
@@ -262,9 +262,11 @@ max_analyzed_offset:: By default, the maximum number of characters
 analyzed for a highlight request is bounded by the value defined in the
 <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> setting,
 and when the number of characters exceeds this limit an error is returned. If
-this setting is set to a non-negative value, the highlighting stops at this defined
+this setting is set to a positive value, the highlighting stops at this defined
 maximum limit, and the rest of the text is not processed, thus not highlighted and
-no error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
+no error is returned. If it is specifically set to -1 then the value of
+<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> is used instead.
+For values < -1 or 0, an error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
 does *not* override the <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>>
 which prevails when it's set to lower value than the query setting.
 

diff --git a/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java b/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java
@@ -17,6 +17,7 @@
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
+import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
@@ -52,7 +53,7 @@ protected List<Object> loadFieldValues(
     }
 
     @Override
-    protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
+    protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
         return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
     }
 

diff --git a/...test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/...test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java
@@ -39,6 +39,7 @@
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
 import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
+import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
 import org.elasticsearch.lucene.search.uhighlight.Snippet;
 import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
 import org.elasticsearch.test.ESTestCase;
@@ -85,7 +86,7 @@ private void assertHighlightOneDoc(
         int noMatchSize,
         String[] expectedPassages,
         int maxAnalyzedOffset,
-        Integer queryMaxAnalyzedOffset
+        Integer queryMaxAnalyzedOffsetIn
     ) throws Exception {
 
         try (Directory dir = newDirectory()) {
@@ -116,8 +117,9 @@ private void assertHighlightOneDoc(
                 for (int i = 0; i < markedUpInputs.length; i++) {
                     annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
                 }
+                QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
                 if (queryMaxAnalyzedOffset != null) {
-                    wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
+                    wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
                 }
                 AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
                 hiliteAnalyzer.setAnnotations(annotations);
@@ -311,6 +313,19 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
             e.getMessage()
         );
 
+        // Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
+        assertHighlightOneDoc(
+            "text",
+            new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
+            query,
+            Locale.ROOT,
+            breakIterator,
+            0,
+            new String[] { "Long Text [exceeds](_hit_term=exceeds) MAX analyzed offset)" },
+            20,
+            -1
+        );
+
         assertHighlightOneDoc(
             "text",
             new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },

diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle
@@ -60,4 +60,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
   task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
   task.skipTest("cat.shards/10_basic/Help", "sync_id is removed in 9.0")
   task.skipTest("search/500_date_range/from, to, include_lower, include_upper deprecated", "deprecated parameters are removed in 9.0")
+  task.skipTest("search.highlight/30_max_analyzed_offset/Plain highlighter with max_analyzed_offset < 0 should FAIL", "semantics of test has changed")
 })
diff --git a/...src/yamlRestTest/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml b/...src/yamlRestTest/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml
@@ -115,12 +115,70 @@ setup:
   - match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
 
 ---
-"Plain highlighter with max_analyzed_offset < 0 should FAIL":
+"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=0 should FAIL":
+
+  - requires:
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ highlight_max_analyzed_offset_default ]
+      reason: Behavior of max_analyzed_offset query param changed in 8.18.
+
+  - do:
+      catch: bad_request
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 0}}
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: "x_content_parse_exception" }
+  - match: { error.caused_by.type: "illegal_argument_exception" }
+  - match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }
+
+---
+"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=1 should SUCCEED":
 
   - requires:
       cluster_features: ["gte_v7.12.0"]
       reason: max_analyzed_offset query param added in 7.12.0
 
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 1}}
+  - match: { hits.hits.0.highlight: null }
+
+---
+"Plain highlighter with max_analyzed_offset = -1 default to index analyze offset should SUCCEED":
+
+  - requires:
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ highlight_max_analyzed_offset_default ]
+      reason: Behavior of max_analyzed_offset query param changed in 8.18.
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test1
+        body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": -1}}
+  - match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
+
+---
+"Plain highlighter with max_analyzed_offset < -1 should FAIL":
+
+  - requires:
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ highlight_max_analyzed_offset_default ]
+      reason: Behavior of max_analyzed_offset query param changed in 8.18.
+
   - do:
       catch: bad_request
       search:
@@ -130,4 +188,4 @@ setup:
   - match: { status: 400 }
   - match: { error.root_cause.0.type: "x_content_parse_exception" }
   - match: { error.caused_by.type: "illegal_argument_exception" }
-  - match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer" }
+  - match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }
diff --git a/...usterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/...usterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@@ -2632,6 +2632,41 @@ public void testPostingsHighlighterOrderByScore() throws Exception {
         });
     }
 
+    public void testMaxQueryOffsetDefault() throws Exception {
+        assertAcked(
+            prepareCreate("test").setMapping(type1PostingsffsetsMapping())
+                .setSettings(Settings.builder().put("index.highlight.max_analyzed_offset", "10").build())
+        );
+        ensureGreen();
+
+        prepareIndex("test").setSource(
+            "field1",
+            new String[] {
+                "This sentence contains one match, not that short. This sentence contains zero sentence matches. "
+                    + "This one contains no matches.",
+                "This is the second value's first sentence. This one contains no matches. "
+                    + "This sentence contains three sentence occurrences (sentence).",
+                "One sentence match here and scored lower since the text is quite long, not that appealing. "
+                    + "This one contains no matches." }
+        ).get();
+        refresh();
+
+        // Specific for this test: by passing "-1" as "maxAnalyzedOffset", the index highlight setting above will be used.
+        SearchSourceBuilder source = searchSource().query(termQuery("field1", "sentence"))
+            .highlighter(highlight().field("field1").order("score").maxAnalyzedOffset(-1));
+
+        assertResponse(client().search(new SearchRequest("test").source(source)), response -> {
+            Map<String, HighlightField> highlightFieldMap = response.getHits().getAt(0).getHighlightFields();
+            assertThat(highlightFieldMap.size(), equalTo(1));
+            HighlightField field1 = highlightFieldMap.get("field1");
+            assertThat(field1.fragments().length, equalTo(1));
+            assertThat(
+                field1.fragments()[0].string(),
+                equalTo("This <em>sentence</em> contains one match, not that short. This sentence contains zero sentence matches.")
+            );
+        });
+    }
+
     public void testPostingsHighlighterEscapeHtml() throws Exception {
         assertAcked(prepareCreate("test").setMapping("title", "type=text," + randomStoreField() + "index_options=offsets"));
 

diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java
@@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
     private final Locale breakIteratorLocale;
     private final int noMatchSize;
     private String fieldValue;
-    private final Integer queryMaxAnalyzedOffset;
+    private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;
 
     CustomFieldHighlighter(
         String field,
@@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
         PassageFormatter passageFormatter,
         Comparator<Passage> passageSortComparator,
         int noMatchSize,
-        Integer queryMaxAnalyzedOffset
+        QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
     ) {
         super(
             field,
@@ -113,7 +113,7 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
     @Override
     protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
         if (queryMaxAnalyzedOffset != null) {
-            off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
+            off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull());
         }
         return super.highlightOffsetsEnums(off);
     }

diff --git a/...er/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/...er/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java
@@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
     private final int noMatchSize;
     private final CustomFieldHighlighter fieldHighlighter;
     private final int maxAnalyzedOffset;
-    private final Integer queryMaxAnalyzedOffset;
+    private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;
 
     /**
      * Creates a new instance of {@link CustomUnifiedHighlighter}
@@ -94,7 +94,7 @@ public CustomUnifiedHighlighter(
         int noMatchSize,
         int maxPassages,
         int maxAnalyzedOffset,
-        Integer queryMaxAnalyzedOffset,
+        QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
         boolean requireFieldMatch,
         boolean weightMatchesEnabled
     ) {
@@ -125,9 +125,9 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
             return null;
         }
         int fieldValueLength = fieldValue.length();
-        if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset)
+        if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
             && (getOffsetSource(field) == OffsetSource.ANALYSIS)
-            && (fieldValueLength > maxAnalyzedOffset))) {
+            && (fieldValueLength > maxAnalyzedOffset)) {
             throw new IllegalArgumentException(
                 "The length ["
                     + fieldValueLength

diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/QueryMaxAnalyzedOffset.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/QueryMaxAnalyzedOffset.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.lucene.search.uhighlight;
+
+public class QueryMaxAnalyzedOffset {
+    private final int queryMaxAnalyzedOffset;
+
+    private QueryMaxAnalyzedOffset(final int queryMaxAnalyzedOffset) {
+        // If we have a negative value, grab value for the actual maximum from the index.
+        this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
+    }
+
+    public static QueryMaxAnalyzedOffset create(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) {
+        if (queryMaxAnalyzedOffset == null) {
+            return null;
+        }
+        return new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset < 0 ? indexMaxAnalyzedOffset : queryMaxAnalyzedOffset);
+    }
+
+    public int getNotNull() {
+        return queryMaxAnalyzedOffset;
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java
@@ -42,6 +42,8 @@ private SearchCapabilities() {}
     private static final String OPTIMIZED_SCALAR_QUANTIZATION_BBQ = "optimized_scalar_quantization_bbq";
     private static final String KNN_QUANTIZED_VECTOR_RESCORE = "knn_quantized_vector_rescore";
 
+    private static final String HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT = "highlight_max_analyzed_offset_default";
+
     public static final Set<String> CAPABILITIES;
     static {
         HashSet<String> capabilities = new HashSet<>();
@@ -58,6 +60,7 @@ private SearchCapabilities() {}
         if (Build.current().isSnapshot()) {
             capabilities.add(KQL_QUERY_SUPPORTED);
         }
+        capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT);
         CAPABILITIES = Set.copyOf(capabilities);
     }
 }
diff --git a/...in/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java b/...in/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
@@ -561,13 +561,12 @@ public Integer phraseLimit() {
     }
 
     /**
-     * Set to a non-negative value which represents the max offset used to analyze
-     * the field thus avoiding exceptions if the field exceeds this limit.
+     * "maxAnalyzedOffset" might be non-negative int, null (unknown), or a negative int (defaulting to index analyzed offset).
      */
     @SuppressWarnings("unchecked")
     public HB maxAnalyzedOffset(Integer maxAnalyzedOffset) {
-        if (maxAnalyzedOffset != null && maxAnalyzedOffset <= 0) {
-            throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer");
+        if (maxAnalyzedOffset != null && (maxAnalyzedOffset < -1 || maxAnalyzedOffset == 0)) {
+            throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer, or -1");
         }
         this.maxAnalyzedOffset = maxAnalyzedOffset;
         return (HB) this;

diff --git a/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -31,6 +31,7 @@
 import org.elasticsearch.lucene.search.uhighlight.BoundedBreakIteratorScanner;
 import org.elasticsearch.lucene.search.uhighlight.CustomPassageFormatter;
 import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
+import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
 import org.elasticsearch.lucene.search.uhighlight.Snippet;
 import org.elasticsearch.search.fetch.FetchContext;
 import org.elasticsearch.search.fetch.FetchSubPhase;
@@ -121,7 +122,10 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
         int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset();
         boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled();
         int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
-        Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
+        QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(
+            fieldContext.field.fieldOptions().maxAnalyzedOffset(),
+            maxAnalyzedOffset
+        );
         Analyzer analyzer = wrapAnalyzer(
             fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
             queryMaxAnalyzedOffset
@@ -171,7 +175,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
             fieldContext.field.fieldOptions().noMatchSize(),
             highlighterNumberOfFragments,
             maxAnalyzedOffset,
-            fieldContext.field.fieldOptions().maxAnalyzedOffset(),
+            queryMaxAnalyzedOffset,
             fieldContext.field.fieldOptions().requireFieldMatch(),
             weightMatchesEnabled
         );
@@ -186,9 +190,9 @@ protected PassageFormatter getPassageFormatter(SearchHighlightContext.Field fiel
         );
     }
 
-    protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
+    protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
         if (maxAnalyzedOffset != null) {
-            analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
+            analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
         }
         return analyzer;
     }