Skip to content

Commit a86f070

Browse files
authored
[8.18] ESQL: Fix sneaky bug in single value query (#127146) (#127176)
* ESQL: Fix sneaky bug in single value query (#127146) Fixes a sneaky bug in single value query that happens when run against a `keyword` field that: * Is defined on every field * Contains the same number of distinct values as documents The simplest way to reproduce this is to build a single shard index with two documents: ``` {"a": "foo"} {"a": ["foo", "bar"]} ``` I don't think this is particularly likely in production, but it's quite likely in tests. Which is where I hit this - in the serverless tests we index an index with four documents into three shards and two of the documents look just like this. So about 1/3 or the time we triggered this bug. Mechanically this is triggered by the `SingleValueMatchQuery` incorrectly rewriting itself to `MatchAll` in the scenario above. This fixes that. * Revert "Switch" This reverts commit cc7805d.
1 parent c272dda commit a86f070

File tree

3 files changed

+72
-40
lines changed

3 files changed

+72
-40
lines changed

docs/changelog/127146.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127146
2+
summary: Fix sneaky bug in single value query
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/querydsl/query/SingleValueMatchQuery.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
230230
}
231231
} else if (lfd instanceof LeafOrdinalsFieldData) {
232232
final Terms terms = context.reader().terms(fieldData.getFieldName());
233-
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.size() != terms.getDocCount()) {
233+
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.getSumDocFreq() != terms.getDocCount()) {
234234
return super.rewrite(indexSearcher);
235235
}
236236
} else {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@ interface Setup {
4848
void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException;
4949
}
5050

51-
@ParametersFactory
51+
@ParametersFactory(argumentFormatting = "%s")
5252
public static List<Object[]> params() {
5353
List<Object[]> params = new ArrayList<>();
5454
for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) {
55+
params.add(new Object[] { new SneakyTwo(fieldType) });
5556
for (boolean multivaluedField : new boolean[] { true, false }) {
5657
for (boolean allowEmpty : new boolean[] { true, false }) {
5758
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
@@ -127,13 +128,13 @@ public XContentBuilder mapping(XContentBuilder builder) throws IOException {
127128

128129
@Override
129130
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
130-
List<List<Object>> fieldValues = new ArrayList<>(100);
131+
List<List<Object>> docs = new ArrayList<>(count);
131132
for (int i = 0; i < count; i++) {
132133
List<Object> values = values(i);
133-
fieldValues.add(values);
134+
docs.add(values);
134135
iw.addDocument(docFor(values));
135136
}
136-
return fieldValues;
137+
return docs;
137138
}
138139

139140
@Override
@@ -152,53 +153,79 @@ private List<Object> values(int i) {
152153
int count = between(2, 10);
153154
Set<Object> set = new HashSet<>(count);
154155
while (set.size() < count) {
155-
set.add(randomValue());
156+
set.add(randomValue(fieldType));
156157
}
157158
return List.copyOf(set);
158159
}
159160
// i == 0 forces at least one empty field when we're configured for empty fields
160161
if (empty && (i == 0 || randomBoolean())) {
161162
return List.of();
162163
}
163-
return List.of(randomValue());
164+
return List.of(randomValue(fieldType));
164165
}
166+
}
165167

166-
private Object randomValue() {
167-
return switch (fieldType) {
168-
case "long" -> randomLong();
169-
case "integer" -> randomInt();
170-
case "short" -> randomShort();
171-
case "byte" -> randomByte();
172-
case "double" -> randomDouble();
173-
case "float" -> randomFloat();
174-
case "keyword" -> randomAlphaOfLength(5);
175-
default -> throw new UnsupportedOperationException();
176-
};
168+
/**
169+
* Tests a scenario where we were incorrectly rewriting {@code keyword} fields to
170+
* {@link MatchAllDocsQuery} when:
171+
* <ul>
172+
* <li>Is defined on every field</li>
173+
* <li>Contains the same number of distinct values as documents</li>
174+
* </ul>
175+
*/
176+
private record SneakyTwo(String fieldType) implements Setup {
177+
@Override
178+
public XContentBuilder mapping(XContentBuilder builder) throws IOException {
179+
return builder.startObject("foo").field("type", fieldType).endObject();
177180
}
178181

179-
private List<IndexableField> docFor(Iterable<Object> values) {
180-
List<IndexableField> fields = new ArrayList<>();
181-
switch (fieldType) {
182-
case "long", "integer", "short", "byte" -> {
183-
for (Object v : values) {
184-
long l = ((Number) v).longValue();
185-
fields.add(new LongField("foo", l, Field.Store.NO));
186-
}
187-
}
188-
case "double", "float" -> {
189-
for (Object v : values) {
190-
double d = ((Number) v).doubleValue();
191-
fields.add(new DoubleField("foo", d, Field.Store.NO));
192-
}
193-
}
194-
case "keyword" -> {
195-
for (Object v : values) {
196-
fields.add(new KeywordField("foo", v.toString(), Field.Store.NO));
197-
}
198-
}
199-
default -> throw new UnsupportedOperationException();
182+
@Override
183+
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
184+
Object first = randomValue(fieldType);
185+
Object second = randomValue(fieldType);
186+
List<Object> justFirst = List.of(first);
187+
List<Object> both = List.of(first, second);
188+
iw.addDocument(docFor(justFirst));
189+
iw.addDocument(docFor(both));
190+
return List.of(justFirst, both);
191+
}
192+
193+
@Override
194+
public void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException {
195+
// There are multivalued fields
196+
assertThat(query.rewrite(indexSearcher), sameInstance(query));
197+
}
198+
}
199+
200+
private static Object randomValue(String fieldType) {
201+
return switch (fieldType) {
202+
case "long" -> randomLong();
203+
case "integer" -> randomInt();
204+
case "short" -> randomShort();
205+
case "byte" -> randomByte();
206+
case "double" -> randomDouble();
207+
case "float" -> randomFloat();
208+
case "keyword" -> randomAlphaOfLength(5);
209+
default -> throw new UnsupportedOperationException();
210+
};
211+
}
212+
213+
private static List<IndexableField> docFor(Iterable<Object> values) {
214+
List<IndexableField> fields = new ArrayList<>();
215+
for (Object v : values) {
216+
if (v instanceof Double n) {
217+
fields.add(new DoubleField("foo", n, Field.Store.NO));
218+
} else if (v instanceof Float n) {
219+
fields.add(new DoubleField("foo", n, Field.Store.NO));
220+
} else if (v instanceof Number n) {
221+
long l = n.longValue();
222+
fields.add(new LongField("foo", l, Field.Store.NO));
223+
} else if (v instanceof String s) {
224+
fields.add(new KeywordField("foo", v.toString(), Field.Store.NO));
225+
} else {
226+
throw new UnsupportedOperationException();
200227
}
201-
return fields;
202228
}
229+
return fields;
203230
}
204231
}

0 commit comments

Comments
 (0)