Skip to content

Commit c2fdc06

Browse files
authored
ESQL: Fix sneaky bug in single value query (#127146)
Fixes a sneaky bug in single value query that happens when run against a `keyword` field that: * Is defined on every field * Contains the same number of distinct values as documents The simplest way to reproduce this is to build a single shard index with two documents: ``` {"a": "foo"} {"a": ["foo", "bar"]} ``` I don't think this is particularly likely in production, but it's quite likely in tests. Which is where I hit this - in the serverless tests we index an index with four documents into three shards and two of the documents look just like this. So about 1/3 or the time we triggered this bug. Mechanically this is triggered by the `SingleValueMatchQuery` incorrectly rewriting itself to `MatchAll` in the scenario above. This fixes that.
1 parent a5f935a commit c2fdc06

File tree

3 files changed

+67
-40
lines changed

3 files changed

+67
-40
lines changed

docs/changelog/127146.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127146
2+
summary: Fix sneaky bug in single value query
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/querydsl/query/SingleValueMatchQuery.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
219219
}
220220
} else if (lfd instanceof LeafOrdinalsFieldData) {
221221
final Terms terms = context.reader().terms(fieldData.getFieldName());
222-
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.size() != terms.getDocCount()) {
222+
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.getSumDocFreq() != terms.getDocCount()) {
223223
return super.rewrite(indexSearcher);
224224
}
225225
} else {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java

Lines changed: 61 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@ interface Setup {
4848
void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException;
4949
}
5050

51-
@ParametersFactory
51+
@ParametersFactory(argumentFormatting = "%s")
5252
public static List<Object[]> params() {
5353
List<Object[]> params = new ArrayList<>();
5454
for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) {
55+
params.add(new Object[] { new SneakyTwo(fieldType) });
5556
for (boolean multivaluedField : new boolean[] { true, false }) {
5657
for (boolean allowEmpty : new boolean[] { true, false }) {
5758
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
@@ -129,13 +130,13 @@ public XContentBuilder mapping(XContentBuilder builder) throws IOException {
129130

130131
@Override
131132
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
132-
List<List<Object>> fieldValues = new ArrayList<>(100);
133+
List<List<Object>> docs = new ArrayList<>(count);
133134
for (int i = 0; i < count; i++) {
134135
List<Object> values = values(i);
135-
fieldValues.add(values);
136+
docs.add(values);
136137
iw.addDocument(docFor(values));
137138
}
138-
return fieldValues;
139+
return docs;
139140
}
140141

141142
@Override
@@ -154,53 +155,74 @@ private List<Object> values(int i) {
154155
int count = between(2, 10);
155156
Set<Object> set = new HashSet<>(count);
156157
while (set.size() < count) {
157-
set.add(randomValue());
158+
set.add(randomValue(fieldType));
158159
}
159160
return List.copyOf(set);
160161
}
161162
// i == 0 forces at least one empty field when we're configured for empty fields
162163
if (empty && (i == 0 || randomBoolean())) {
163164
return List.of();
164165
}
165-
return List.of(randomValue());
166+
return List.of(randomValue(fieldType));
166167
}
168+
}
167169

168-
private Object randomValue() {
169-
return switch (fieldType) {
170-
case "long" -> randomLong();
171-
case "integer" -> randomInt();
172-
case "short" -> randomShort();
173-
case "byte" -> randomByte();
174-
case "double" -> randomDouble();
175-
case "float" -> randomFloat();
176-
case "keyword" -> randomAlphaOfLength(5);
177-
default -> throw new UnsupportedOperationException();
178-
};
170+
/**
171+
* Tests a scenario where we were incorrectly rewriting {@code keyword} fields to
172+
* {@link MatchAllDocsQuery} when:
173+
* <ul>
174+
* <li>Is defined on every field</li>
175+
* <li>Contains the same number of distinct values as documents</li>
176+
* </ul>
177+
*/
178+
private record SneakyTwo(String fieldType) implements Setup {
179+
@Override
180+
public XContentBuilder mapping(XContentBuilder builder) throws IOException {
181+
return builder.startObject("foo").field("type", fieldType).endObject();
179182
}
180183

181-
private List<IndexableField> docFor(Iterable<Object> values) {
182-
List<IndexableField> fields = new ArrayList<>();
183-
switch (fieldType) {
184-
case "long", "integer", "short", "byte" -> {
185-
for (Object v : values) {
186-
long l = ((Number) v).longValue();
187-
fields.add(new LongField("foo", l, Field.Store.NO));
188-
}
189-
}
190-
case "double", "float" -> {
191-
for (Object v : values) {
192-
double d = ((Number) v).doubleValue();
193-
fields.add(new DoubleField("foo", d, Field.Store.NO));
194-
}
195-
}
196-
case "keyword" -> {
197-
for (Object v : values) {
198-
fields.add(new KeywordField("foo", v.toString(), Field.Store.NO));
199-
}
200-
}
184+
@Override
185+
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
186+
Object first = randomValue(fieldType);
187+
Object second = randomValue(fieldType);
188+
List<Object> justFirst = List.of(first);
189+
List<Object> both = List.of(first, second);
190+
iw.addDocument(docFor(justFirst));
191+
iw.addDocument(docFor(both));
192+
return List.of(justFirst, both);
193+
}
194+
195+
@Override
196+
public void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException {
197+
// There are multivalued fields
198+
assertThat(query.rewrite(indexSearcher), sameInstance(query));
199+
}
200+
}
201+
202+
private static Object randomValue(String fieldType) {
203+
return switch (fieldType) {
204+
case "long" -> randomLong();
205+
case "integer" -> randomInt();
206+
case "short" -> randomShort();
207+
case "byte" -> randomByte();
208+
case "double" -> randomDouble();
209+
case "float" -> randomFloat();
210+
case "keyword" -> randomAlphaOfLength(5);
211+
default -> throw new UnsupportedOperationException();
212+
};
213+
}
214+
215+
private static List<IndexableField> docFor(Iterable<Object> values) {
216+
List<IndexableField> fields = new ArrayList<>();
217+
for (Object v : values) {
218+
fields.add(switch (v) {
219+
case Double n -> new DoubleField("foo", n, Field.Store.NO);
220+
case Float n -> new DoubleField("foo", n, Field.Store.NO);
221+
case Number n -> new LongField("foo", n.longValue(), Field.Store.NO);
222+
case String s -> new KeywordField("foo", s, Field.Store.NO);
201223
default -> throw new UnsupportedOperationException();
202-
}
203-
return fields;
224+
});
204225
}
226+
return fields;
205227
}
206228
}

0 commit comments

Comments
 (0)