Merge branch 'main' into fix/127519

prwhelan · web-flow · commit 1b94212a31bd · 2025-04-29T13:46:30.000-04:00
diff --git a/docs/changelog/127229.yaml b/docs/changelog/127229.yaml
@@ -0,0 +1,6 @@
+pr: 127229
+summary: Return BAD_REQUEST when a field scorer references a missing field
+area: Ranking
+type: bug
+issues:
+ - 127162
diff --git a/docs/changelog/127414.yaml b/docs/changelog/127414.yaml
@@ -0,0 +1,5 @@
+pr: 127414
+summary: Fix npe when using source confirmed text query against missing field
+area: Search
+type: bug
+issues: []
diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java
@@ -267,7 +267,11 @@ public boolean isCacheable(LeafReaderContext ctx) {
             @Override
             public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                 NumericDocValues norms = context.reader().getNormValues(field);
-                RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier(context).get(0);
+                ScorerSupplier scorerSupplier = scorerSupplier(context);
+                if (scorerSupplier == null) {
+                    return Explanation.noMatch("No matching phrase");
+                }
+                RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier.get(0);
                 if (scorer == null) {
                     return Explanation.noMatch("No matching phrase");
                 }
@@ -277,6 +281,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
                 }
                 float phraseFreq = scorer.freq();
                 Explanation freqExplanation = Explanation.match(phraseFreq, "phraseFreq=" + phraseFreq);
+                assert simScorer != null;
                 Explanation scoreExplanation = simScorer.explain(freqExplanation, getNormValue(norms, doc));
                 return Explanation.match(
                     scoreExplanation.getValue(),
@@ -321,7 +326,11 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException {
                     Weight innerWeight = in.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1);
                     return innerWeight.matches(context, doc);
                 }
-                RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier(context).get(0L);
+                ScorerSupplier scorerSupplier = scorerSupplier(context);
+                if (scorerSupplier == null) {
+                    return null;
+                }
+                RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier.get(0L);
                 if (scorer == null) {
                     return null;
                 }
diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java
@@ -24,6 +24,7 @@
 import org.apache.lucene.queries.spans.SpanTermQuery;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Matches;
@@ -101,6 +102,26 @@ public void testTerm() throws Exception {
         }
     }
 
+    public void testMissingPhrase() throws Exception {
+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
+
+            Document doc = new Document();
+            doc.add(new TextField("body", "a b c b a b c", Store.YES));
+            w.addDocument(doc);
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                IndexSearcher searcher = newSearcher(reader);
+                PhraseQuery query = new PhraseQuery("missing_field", "b", "c");
+                Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER);
+                Explanation explanation = searcher.explain(sourceConfirmedPhraseQuery, 0);
+                assertFalse(explanation.isMatch());
+
+                Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1);
+                assertNull(weight.matches(getOnlyLeafReader(reader).getContext(), 0));
+            }
+        }
+    }
+
     public void testPhrase() throws Exception {
         try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
 
diff --git a/plugins/examples/rescore/src/yamlRestTest/resources/rest-api-spec/test/example-rescore/30_factor_field.yml b/plugins/examples/rescore/src/yamlRestTest/resources/rest-api-spec/test/example-rescore/30_factor_field.yml
@@ -48,6 +48,24 @@ setup:
   - match: { hits.hits.1._score: 20 }
   - match: { hits.hits.2._score: 10 }
 
+---
+"referencing a missing field returns bad request":
+  - requires:
+      cluster_features: [ "search.rescorer.missing.field.bad.request" ]
+      reason: "Testing the behaviour change with this feature"
+  - do:
+      catch: bad_request
+      search:
+        index: test
+        body:
+          rescore:
+            example:
+              factor: 1
+              factor_field: missing
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: "illegal_argument_exception" }
+  - match: { error.root_cause.0.reason: "Missing value for field [missing]" }
+
 ---
 "sorted based on a numeric field and rescored based on a factor field using a window size":
   - do:
diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/function/FieldValueFactorFunction.java b/server/src/main/java/org/elasticsearch/common/lucene/search/function/FieldValueFactorFunction.java
@@ -11,7 +11,6 @@
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Explanation;
-import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -73,7 +72,7 @@ public double score(int docId, float subQueryScore) throws IOException {
                     if (missing != null) {
                         value = missing;
                     } else {
-                        throw new ElasticsearchException("Missing value for field [" + field + "]");
+                        throw new IllegalArgumentException("Missing value for field [" + field + "]");
                     }
                 }
                 double val = value * boostFactor;
diff --git a/server/src/main/java/org/elasticsearch/search/SearchFeatures.java b/server/src/main/java/org/elasticsearch/search/SearchFeatures.java
@@ -28,9 +28,10 @@ public Set<NodeFeature> getFeatures() {
     public static final NodeFeature COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS = new NodeFeature(
         "search.completion_field.duplicate.support"
     );
+    public static final NodeFeature RESCORER_MISSING_FIELD_BAD_REQUEST = new NodeFeature("search.rescorer.missing.field.bad.request");
 
     @Override
     public Set<NodeFeature> getTestFeatures() {
-        return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS);
+        return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS, RESCORER_MISSING_FIELD_BAD_REQUEST);
     }
 }
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/TimeSeriesBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/TimeSeriesBlockHash.java
@@ -30,8 +30,6 @@
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
-import java.util.Objects;
-
 /**
  * An optimized block hash that receives two blocks: tsid and timestamp, which are sorted.
  * Since the incoming data is sorted, this block hash appends the incoming data to the internal arrays without lookup.
@@ -41,7 +39,7 @@ public final class TimeSeriesBlockHash extends BlockHash {
     private final int tsHashChannel;
     private final int timestampIntervalChannel;
 
-    private final BytesRef lastTsid = new BytesRef();
+    private int lastTsidPosition = 0;
     private final BytesRefArrayWithSize tsidArray;
 
     private long lastTimestamp;
@@ -64,44 +62,77 @@ public void close() {
         Releasables.close(tsidArray, timestampArray, perTsidCountArray);
     }
 
+    private OrdinalBytesRefVector getTsidVector(Page page) {
+        BytesRefBlock block = page.getBlock(tsHashChannel);
+        var ordinalBlock = block.asOrdinals();
+        if (ordinalBlock == null) {
+            throw new IllegalStateException("expected ordinal block for tsid");
+        }
+        var ordinalVector = ordinalBlock.asVector();
+        if (ordinalVector == null) {
+            throw new IllegalStateException("expected ordinal vector for tsid");
+        }
+        return ordinalVector;
+    }
+
+    private LongVector getTimestampVector(Page page) {
+        final LongBlock timestampsBlock = page.getBlock(timestampIntervalChannel);
+        LongVector timestampsVector = timestampsBlock.asVector();
+        if (timestampsVector == null) {
+            throw new IllegalStateException("expected long vector for timestamp");
+        }
+        return timestampsVector;
+    }
+
     @Override
     public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
-        final BytesRefBlock tsidBlock = page.getBlock(tsHashChannel);
-        final BytesRefVector tsidVector = Objects.requireNonNull(tsidBlock.asVector(), "tsid input must be a vector");
-        final LongBlock timestampBlock = page.getBlock(timestampIntervalChannel);
-        final LongVector timestampVector = Objects.requireNonNull(timestampBlock.asVector(), "timestamp input must be a vector");
-        try (var ordsBuilder = blockFactory.newIntVectorBuilder(tsidVector.getPositionCount())) {
+        final BytesRefVector tsidDict;
+        final IntVector tsidOrdinals;
+        {
+            final var tsidVector = getTsidVector(page);
+            tsidDict = tsidVector.getDictionaryVector();
+            tsidOrdinals = tsidVector.getOrdinalsVector();
+        }
+        try (var ordsBuilder = blockFactory.newIntVectorBuilder(tsidOrdinals.getPositionCount())) {
             final BytesRef spare = new BytesRef();
-            // TODO: optimize incoming ordinal block
-            for (int i = 0; i < tsidVector.getPositionCount(); i++) {
-                final BytesRef tsid = tsidVector.getBytesRef(i, spare);
+            final BytesRef lastTsid = new BytesRef();
+            final LongVector timestampVector = getTimestampVector(page);
+            int lastOrd = -1;
+            for (int i = 0; i < tsidOrdinals.getPositionCount(); i++) {
+                final int newOrd = tsidOrdinals.getInt(i);
+                boolean newGroup = false;
+                if (lastOrd != newOrd) {
+                    final var newTsid = tsidDict.getBytesRef(newOrd, spare);
+                    if (positionCount() == 0) {
+                        newGroup = true;
+                    } else if (lastOrd == -1) {
+                        tsidArray.get(lastTsidPosition, lastTsid);
+                        newGroup = lastTsid.equals(newTsid) == false;
+                    } else {
+                        newGroup = true;
+                    }
+                    if (newGroup) {
+                        endTsidGroup();
+                        lastTsidPosition = tsidArray.count;
+                        tsidArray.append(newTsid);
+                    }
+                    lastOrd = newOrd;
+                }
                 final long timestamp = timestampVector.getLong(i);
-                ordsBuilder.appendInt(addOnePosition(tsid, timestamp));
+                if (newGroup || timestamp != lastTimestamp) {
+                    assert newGroup || lastTimestamp >= timestamp : "@timestamp goes backward " + lastTimestamp + " < " + timestamp;
+                    timestampArray.append(timestamp);
+                    lastTimestamp = timestamp;
+                    currentTimestampCount++;
+                }
+                ordsBuilder.appendInt(timestampArray.count - 1);
             }
             try (var ords = ordsBuilder.build()) {
                 addInput.add(0, ords);
             }
         }
     }
 
-    private int addOnePosition(BytesRef tsid, long timestamp) {
-        boolean newGroup = false;
-        if (positionCount() == 0 || lastTsid.equals(tsid) == false) {
-            assert positionCount() == 0 || lastTsid.compareTo(tsid) < 0 : "tsid goes backward ";
-            endTsidGroup();
-            tsidArray.append(tsid);
-            tsidArray.get(tsidArray.count - 1, lastTsid);
-            newGroup = true;
-        }
-        if (newGroup || timestamp != lastTimestamp) {
-            assert newGroup || lastTimestamp >= timestamp : "@timestamp goes backward " + lastTimestamp + " < " + timestamp;
-            timestampArray.append(timestamp);
-            lastTimestamp = timestamp;
-            currentTimestampCount++;
-        }
-        return positionCount() - 1;
-    }
-
     private void endTsidGroup() {
         if (currentTimestampCount > 0) {
             perTsidCountArray.append(currentTimestampCount);
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java
@@ -75,7 +75,7 @@ public BytesRef getBytesRef(int valueIndex, BytesRef dest) {
     }
 
     @Override
-    public BytesRefVector asVector() {
+    public OrdinalBytesRefVector asVector() {
         IntVector vector = ordinals.asVector();
         if (vector != null) {
             return new OrdinalBytesRefVector(vector, bytes);
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TimeSeriesAggregationOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TimeSeriesAggregationOperator.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.compute.aggregation.GroupingAggregatorEvaluationContext;
 import org.elasticsearch.compute.aggregation.TimeSeriesGroupingAggregatorEvaluationContext;
 import org.elasticsearch.compute.aggregation.blockhash.BlockHash;
+import org.elasticsearch.compute.aggregation.blockhash.TimeSeriesBlockHash;
 import org.elasticsearch.compute.data.Block;
 import org.elasticsearch.compute.data.ElementType;
 import org.elasticsearch.compute.data.LongBlock;
@@ -30,6 +31,7 @@ public class TimeSeriesAggregationOperator extends HashAggregationOperator {
 
     public record Factory(
         Rounding.Prepared timeBucket,
+        boolean sortedInput,
         List<BlockHash.GroupSpec> groups,
         AggregatorMode aggregatorMode,
         List<GroupingAggregator.Factory> aggregators,
@@ -38,17 +40,18 @@ public record Factory(
         @Override
         public Operator get(DriverContext driverContext) {
             // TODO: use TimeSeriesBlockHash when possible
-            return new TimeSeriesAggregationOperator(
-                timeBucket,
-                aggregators,
-                () -> BlockHash.build(
-                    groups,
-                    driverContext.blockFactory(),
-                    maxPageSize,
-                    true // we can enable optimizations as the inputs are vectors
-                ),
-                driverContext
-            );
+            return new TimeSeriesAggregationOperator(timeBucket, aggregators, () -> {
+                if (sortedInput && groups.size() == 2) {
+                    return new TimeSeriesBlockHash(groups.get(0).channel(), groups.get(1).channel(), driverContext.blockFactory());
+                } else {
+                    return BlockHash.build(
+                        groups,
+                        driverContext.blockFactory(),
+                        maxPageSize,
+                        true // we can enable optimizations as the inputs are vectors
+                    );
+                }
+            }, driverContext);
         }
 
         @Override
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java

Original file line number	Diff line number	Diff line change
`@@ -28,9 +28,10 @@ public Set<NodeFeature> getFeatures() {`
`28`	`28`	`public static final NodeFeature COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS = new NodeFeature(`
`29`	`29`	`"search.completion_field.duplicate.support"`
`30`	`30`	`);`
	`31`	`+ public static final NodeFeature RESCORER_MISSING_FIELD_BAD_REQUEST = new NodeFeature("search.rescorer.missing.field.bad.request");`
`31`	`32`
`32`	`33`	`@Override`
`33`	`34`	`public Set<NodeFeature> getTestFeatures() {`
`34`		`- return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS);`
	`35`	`+ return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS, RESCORER_MISSING_FIELD_BAD_REQUEST);`
`35`	`36`	`}`
`36`	`37`	`}`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ public BytesRef getBytesRef(int valueIndex, BytesRef dest) {`
`75`	`75`	`}`
`76`	`76`
`77`	`77`	`@Override`
`78`		`- public BytesRefVector asVector() {`
	`78`	`+ public OrdinalBytesRefVector asVector() {`
`79`	`79`	`IntVector vector = ordinals.asVector();`
`80`	`80`	`if (vector != null) {`
`81`	`81`	`return new OrdinalBytesRefVector(vector, bytes);`