Skip to content

Commit 1b94212

Browse files
authored
Merge branch 'main' into fix/127519
2 parents 19fa254 + 63b55e4 commit 1b94212

File tree

15 files changed

+317
-52
lines changed

15 files changed

+317
-52
lines changed

docs/changelog/127229.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127229
2+
summary: Return BAD_REQUEST when a field scorer references a missing field
3+
area: Ranking
4+
type: bug
5+
issues:
6+
- 127162

docs/changelog/127414.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127414
2+
summary: Fix npe when using source confirmed text query against missing field
3+
area: Search
4+
type: bug
5+
issues: []

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,11 @@ public boolean isCacheable(LeafReaderContext ctx) {
267267
@Override
268268
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
269269
NumericDocValues norms = context.reader().getNormValues(field);
270-
RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier(context).get(0);
270+
ScorerSupplier scorerSupplier = scorerSupplier(context);
271+
if (scorerSupplier == null) {
272+
return Explanation.noMatch("No matching phrase");
273+
}
274+
RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier.get(0);
271275
if (scorer == null) {
272276
return Explanation.noMatch("No matching phrase");
273277
}
@@ -277,6 +281,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
277281
}
278282
float phraseFreq = scorer.freq();
279283
Explanation freqExplanation = Explanation.match(phraseFreq, "phraseFreq=" + phraseFreq);
284+
assert simScorer != null;
280285
Explanation scoreExplanation = simScorer.explain(freqExplanation, getNormValue(norms, doc));
281286
return Explanation.match(
282287
scoreExplanation.getValue(),
@@ -321,7 +326,11 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException {
321326
Weight innerWeight = in.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1);
322327
return innerWeight.matches(context, doc);
323328
}
324-
RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier(context).get(0L);
329+
ScorerSupplier scorerSupplier = scorerSupplier(context);
330+
if (scorerSupplier == null) {
331+
return null;
332+
}
333+
RuntimePhraseScorer scorer = (RuntimePhraseScorer) scorerSupplier.get(0L);
325334
if (scorer == null) {
326335
return null;
327336
}

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.lucene.queries.spans.SpanTermQuery;
2525
import org.apache.lucene.search.BooleanClause.Occur;
2626
import org.apache.lucene.search.BooleanQuery;
27+
import org.apache.lucene.search.Explanation;
2728
import org.apache.lucene.search.IndexSearcher;
2829
import org.apache.lucene.search.MatchNoDocsQuery;
2930
import org.apache.lucene.search.Matches;
@@ -101,6 +102,26 @@ public void testTerm() throws Exception {
101102
}
102103
}
103104

105+
public void testMissingPhrase() throws Exception {
106+
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
107+
108+
Document doc = new Document();
109+
doc.add(new TextField("body", "a b c b a b c", Store.YES));
110+
w.addDocument(doc);
111+
112+
try (IndexReader reader = DirectoryReader.open(w)) {
113+
IndexSearcher searcher = newSearcher(reader);
114+
PhraseQuery query = new PhraseQuery("missing_field", "b", "c");
115+
Query sourceConfirmedPhraseQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER);
116+
Explanation explanation = searcher.explain(sourceConfirmedPhraseQuery, 0);
117+
assertFalse(explanation.isMatch());
118+
119+
Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1);
120+
assertNull(weight.matches(getOnlyLeafReader(reader).getContext(), 0));
121+
}
122+
}
123+
}
124+
104125
public void testPhrase() throws Exception {
105126
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
106127

plugins/examples/rescore/src/yamlRestTest/resources/rest-api-spec/test/example-rescore/30_factor_field.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,24 @@ setup:
4848
- match: { hits.hits.1._score: 20 }
4949
- match: { hits.hits.2._score: 10 }
5050

51+
---
52+
"referencing a missing field returns bad request":
53+
- requires:
54+
cluster_features: [ "search.rescorer.missing.field.bad.request" ]
55+
reason: "Testing the behaviour change with this feature"
56+
- do:
57+
catch: bad_request
58+
search:
59+
index: test
60+
body:
61+
rescore:
62+
example:
63+
factor: 1
64+
factor_field: missing
65+
- match: { status: 400 }
66+
- match: { error.root_cause.0.type: "illegal_argument_exception" }
67+
- match: { error.root_cause.0.reason: "Missing value for field [missing]" }
68+
5169
---
5270
"sorted based on a numeric field and rescored based on a factor field using a window size":
5371
- do:

server/src/main/java/org/elasticsearch/common/lucene/search/function/FieldValueFactorFunction.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
import org.apache.lucene.index.LeafReaderContext;
1313
import org.apache.lucene.search.Explanation;
14-
import org.elasticsearch.ElasticsearchException;
1514
import org.elasticsearch.common.io.stream.StreamInput;
1615
import org.elasticsearch.common.io.stream.StreamOutput;
1716
import org.elasticsearch.common.io.stream.Writeable;
@@ -73,7 +72,7 @@ public double score(int docId, float subQueryScore) throws IOException {
7372
if (missing != null) {
7473
value = missing;
7574
} else {
76-
throw new ElasticsearchException("Missing value for field [" + field + "]");
75+
throw new IllegalArgumentException("Missing value for field [" + field + "]");
7776
}
7877
}
7978
double val = value * boostFactor;

server/src/main/java/org/elasticsearch/search/SearchFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ public Set<NodeFeature> getFeatures() {
2828
public static final NodeFeature COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS = new NodeFeature(
2929
"search.completion_field.duplicate.support"
3030
);
31+
public static final NodeFeature RESCORER_MISSING_FIELD_BAD_REQUEST = new NodeFeature("search.rescorer.missing.field.bad.request");
3132

3233
@Override
3334
public Set<NodeFeature> getTestFeatures() {
34-
return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS);
35+
return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS, RESCORER_MISSING_FIELD_BAD_REQUEST);
3536
}
3637
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/TimeSeriesBlockHash.java

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@
3030
import org.elasticsearch.core.ReleasableIterator;
3131
import org.elasticsearch.core.Releasables;
3232

33-
import java.util.Objects;
34-
3533
/**
3634
* An optimized block hash that receives two blocks: tsid and timestamp, which are sorted.
3735
* Since the incoming data is sorted, this block hash appends the incoming data to the internal arrays without lookup.
@@ -41,7 +39,7 @@ public final class TimeSeriesBlockHash extends BlockHash {
4139
private final int tsHashChannel;
4240
private final int timestampIntervalChannel;
4341

44-
private final BytesRef lastTsid = new BytesRef();
42+
private int lastTsidPosition = 0;
4543
private final BytesRefArrayWithSize tsidArray;
4644

4745
private long lastTimestamp;
@@ -64,44 +62,77 @@ public void close() {
6462
Releasables.close(tsidArray, timestampArray, perTsidCountArray);
6563
}
6664

65+
private OrdinalBytesRefVector getTsidVector(Page page) {
66+
BytesRefBlock block = page.getBlock(tsHashChannel);
67+
var ordinalBlock = block.asOrdinals();
68+
if (ordinalBlock == null) {
69+
throw new IllegalStateException("expected ordinal block for tsid");
70+
}
71+
var ordinalVector = ordinalBlock.asVector();
72+
if (ordinalVector == null) {
73+
throw new IllegalStateException("expected ordinal vector for tsid");
74+
}
75+
return ordinalVector;
76+
}
77+
78+
private LongVector getTimestampVector(Page page) {
79+
final LongBlock timestampsBlock = page.getBlock(timestampIntervalChannel);
80+
LongVector timestampsVector = timestampsBlock.asVector();
81+
if (timestampsVector == null) {
82+
throw new IllegalStateException("expected long vector for timestamp");
83+
}
84+
return timestampsVector;
85+
}
86+
6787
@Override
6888
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
69-
final BytesRefBlock tsidBlock = page.getBlock(tsHashChannel);
70-
final BytesRefVector tsidVector = Objects.requireNonNull(tsidBlock.asVector(), "tsid input must be a vector");
71-
final LongBlock timestampBlock = page.getBlock(timestampIntervalChannel);
72-
final LongVector timestampVector = Objects.requireNonNull(timestampBlock.asVector(), "timestamp input must be a vector");
73-
try (var ordsBuilder = blockFactory.newIntVectorBuilder(tsidVector.getPositionCount())) {
89+
final BytesRefVector tsidDict;
90+
final IntVector tsidOrdinals;
91+
{
92+
final var tsidVector = getTsidVector(page);
93+
tsidDict = tsidVector.getDictionaryVector();
94+
tsidOrdinals = tsidVector.getOrdinalsVector();
95+
}
96+
try (var ordsBuilder = blockFactory.newIntVectorBuilder(tsidOrdinals.getPositionCount())) {
7497
final BytesRef spare = new BytesRef();
75-
// TODO: optimize incoming ordinal block
76-
for (int i = 0; i < tsidVector.getPositionCount(); i++) {
77-
final BytesRef tsid = tsidVector.getBytesRef(i, spare);
98+
final BytesRef lastTsid = new BytesRef();
99+
final LongVector timestampVector = getTimestampVector(page);
100+
int lastOrd = -1;
101+
for (int i = 0; i < tsidOrdinals.getPositionCount(); i++) {
102+
final int newOrd = tsidOrdinals.getInt(i);
103+
boolean newGroup = false;
104+
if (lastOrd != newOrd) {
105+
final var newTsid = tsidDict.getBytesRef(newOrd, spare);
106+
if (positionCount() == 0) {
107+
newGroup = true;
108+
} else if (lastOrd == -1) {
109+
tsidArray.get(lastTsidPosition, lastTsid);
110+
newGroup = lastTsid.equals(newTsid) == false;
111+
} else {
112+
newGroup = true;
113+
}
114+
if (newGroup) {
115+
endTsidGroup();
116+
lastTsidPosition = tsidArray.count;
117+
tsidArray.append(newTsid);
118+
}
119+
lastOrd = newOrd;
120+
}
78121
final long timestamp = timestampVector.getLong(i);
79-
ordsBuilder.appendInt(addOnePosition(tsid, timestamp));
122+
if (newGroup || timestamp != lastTimestamp) {
123+
assert newGroup || lastTimestamp >= timestamp : "@timestamp goes backward " + lastTimestamp + " < " + timestamp;
124+
timestampArray.append(timestamp);
125+
lastTimestamp = timestamp;
126+
currentTimestampCount++;
127+
}
128+
ordsBuilder.appendInt(timestampArray.count - 1);
80129
}
81130
try (var ords = ordsBuilder.build()) {
82131
addInput.add(0, ords);
83132
}
84133
}
85134
}
86135

87-
private int addOnePosition(BytesRef tsid, long timestamp) {
88-
boolean newGroup = false;
89-
if (positionCount() == 0 || lastTsid.equals(tsid) == false) {
90-
assert positionCount() == 0 || lastTsid.compareTo(tsid) < 0 : "tsid goes backward ";
91-
endTsidGroup();
92-
tsidArray.append(tsid);
93-
tsidArray.get(tsidArray.count - 1, lastTsid);
94-
newGroup = true;
95-
}
96-
if (newGroup || timestamp != lastTimestamp) {
97-
assert newGroup || lastTimestamp >= timestamp : "@timestamp goes backward " + lastTimestamp + " < " + timestamp;
98-
timestampArray.append(timestamp);
99-
lastTimestamp = timestamp;
100-
currentTimestampCount++;
101-
}
102-
return positionCount() - 1;
103-
}
104-
105136
private void endTsidGroup() {
106137
if (currentTimestampCount > 0) {
107138
perTsidCountArray.append(currentTimestampCount);

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public BytesRef getBytesRef(int valueIndex, BytesRef dest) {
7575
}
7676

7777
@Override
78-
public BytesRefVector asVector() {
78+
public OrdinalBytesRefVector asVector() {
7979
IntVector vector = ordinals.asVector();
8080
if (vector != null) {
8181
return new OrdinalBytesRefVector(vector, bytes);

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/TimeSeriesAggregationOperator.java

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.compute.aggregation.GroupingAggregatorEvaluationContext;
1515
import org.elasticsearch.compute.aggregation.TimeSeriesGroupingAggregatorEvaluationContext;
1616
import org.elasticsearch.compute.aggregation.blockhash.BlockHash;
17+
import org.elasticsearch.compute.aggregation.blockhash.TimeSeriesBlockHash;
1718
import org.elasticsearch.compute.data.Block;
1819
import org.elasticsearch.compute.data.ElementType;
1920
import org.elasticsearch.compute.data.LongBlock;
@@ -30,6 +31,7 @@ public class TimeSeriesAggregationOperator extends HashAggregationOperator {
3031

3132
public record Factory(
3233
Rounding.Prepared timeBucket,
34+
boolean sortedInput,
3335
List<BlockHash.GroupSpec> groups,
3436
AggregatorMode aggregatorMode,
3537
List<GroupingAggregator.Factory> aggregators,
@@ -38,17 +40,18 @@ public record Factory(
3840
@Override
3941
public Operator get(DriverContext driverContext) {
4042
// TODO: use TimeSeriesBlockHash when possible
41-
return new TimeSeriesAggregationOperator(
42-
timeBucket,
43-
aggregators,
44-
() -> BlockHash.build(
45-
groups,
46-
driverContext.blockFactory(),
47-
maxPageSize,
48-
true // we can enable optimizations as the inputs are vectors
49-
),
50-
driverContext
51-
);
43+
return new TimeSeriesAggregationOperator(timeBucket, aggregators, () -> {
44+
if (sortedInput && groups.size() == 2) {
45+
return new TimeSeriesBlockHash(groups.get(0).channel(), groups.get(1).channel(), driverContext.blockFactory());
46+
} else {
47+
return BlockHash.build(
48+
groups,
49+
driverContext.blockFactory(),
50+
maxPageSize,
51+
true // we can enable optimizations as the inputs are vectors
52+
);
53+
}
54+
}, driverContext);
5255
}
5356

5457
@Override

0 commit comments

Comments
 (0)