Skip to content

Commit 29ff32e

Browse files
authored
Less jank in after-key parsing for unmapped fields (#86359) (#97282)
Resolves #85928 The after-key parsing is pretty weird, and there are probably more bugs there. I did not take the opportunity to refactor the whole thing, but we should. This fixes the immediate problem by treating after keys as bytes refs when we don't have a field but think we want a keyword. We were already doing that if the user asked for a missing bucket, this just extends the behavior in the case that we don't. Long term, the terms Composite source (and probably other Composite sources) should have specializations for unmapped fields. That's the direction we want to take aggs in general.
1 parent 22ca339 commit 29ff32e

File tree

3 files changed

+98
-1
lines changed

3 files changed

+98
-1
lines changed

docs/changelog/97282.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 97282
2+
summary: Less jank in after-key parsing for unmapped fields
3+
area: Aggregations
4+
type: bug
5+
issues:
6+
- 85928

server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/GlobalOrdinalValuesSource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ void setAfter(Comparable<?> value) {
105105
if (missingBucket && value == null) {
106106
afterValue = null;
107107
afterValueGlobalOrd = MISSING_VALUE_FLAG;
108-
} else if (value.getClass() == String.class || (missingBucket && fieldType == null)) {
108+
} else if (value.getClass() == String.class || (fieldType == null)) {
109109
// the value might be not string if this field is missing in this shard but present in other shards
110110
// and doesn't have a string type
111111
afterValue = format.parseBytesRef(value.toString());

server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,16 @@ public void testUnmappedFieldWithTerms() throws Exception {
151151
createDocument("keyword", "c")
152152
)
153153
);
154+
155+
// Only aggregate on unmapped field, no missing bucket => no results
154156
testSearchCase(
155157
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
156158
dataset,
157159
() -> new CompositeAggregationBuilder("name", Arrays.asList(new TermsValuesSourceBuilder("unmapped").field("unmapped"))),
158160
(result) -> { assertEquals(0, result.getBuckets().size()); }
159161
);
160162

163+
// Only aggregate on unmapped field, missing bucket => one null bucket with all values
161164
testSearchCase(
162165
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
163166
dataset,
@@ -173,6 +176,7 @@ public void testUnmappedFieldWithTerms() throws Exception {
173176
}
174177
);
175178

179+
// Only aggregate on the unmapped field, after key for that field is set as `null` => no results
176180
testSearchCase(
177181
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
178182
dataset,
@@ -183,6 +187,7 @@ public void testUnmappedFieldWithTerms() throws Exception {
183187
(result) -> { assertEquals(0, result.getBuckets().size()); }
184188
);
185189

190+
// Mapped field first, then unmapped, no missing bucket => no results
186191
testSearchCase(
187192
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
188193
dataset,
@@ -196,6 +201,7 @@ public void testUnmappedFieldWithTerms() throws Exception {
196201
(result) -> { assertEquals(0, result.getBuckets().size()); }
197202
);
198203

204+
// Mapped + unmapped, include missing => 3 buckets
199205
testSearchCase(
200206
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
201207
dataset,
@@ -217,6 +223,91 @@ public void testUnmappedFieldWithTerms() throws Exception {
217223
assertEquals(1L, result.getBuckets().get(2).getDocCount());
218224
}
219225
);
226+
227+
// Unmapped field, keyword after key, unmapped sorts after, include unmapped => 1 bucket
228+
testSearchCase(
229+
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
230+
dataset,
231+
() -> new CompositeAggregationBuilder(
232+
"name",
233+
Arrays.asList(
234+
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.LAST)
235+
)
236+
).aggregateAfter(Collections.singletonMap("unmapped", "cat")),
237+
(InternalComposite result) -> {
238+
assertEquals(1, result.getBuckets().size());
239+
assertEquals("{unmapped=null}", result.afterKey().toString());
240+
assertEquals("{unmapped=null}", result.getBuckets().get(0).getKeyAsString());
241+
assertEquals(5L, result.getBuckets().get(0).getDocCount());
242+
}
243+
);
244+
245+
// Unmapped field, keyword after key, unmapped sorts before, include unmapped => 0 buckets
246+
testSearchCase(
247+
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
248+
dataset,
249+
() -> new CompositeAggregationBuilder(
250+
"name",
251+
Arrays.asList(
252+
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.FIRST)
253+
)
254+
).aggregateAfter(Collections.singletonMap("unmapped", "cat")),
255+
(InternalComposite result) -> { assertEquals(0, result.getBuckets().size()); }
256+
);
257+
258+
// Unmapped field, number after key, unmapped sorts after, include unmapped => 1 bucket
259+
testSearchCase(
260+
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
261+
dataset,
262+
() -> new CompositeAggregationBuilder(
263+
"name",
264+
Arrays.asList(
265+
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.LAST)
266+
)
267+
).aggregateAfter(Collections.singletonMap("unmapped", 42)),
268+
(InternalComposite result) -> {
269+
assertEquals(1, result.getBuckets().size());
270+
assertEquals("{unmapped=null}", result.afterKey().toString());
271+
assertEquals("{unmapped=null}", result.getBuckets().get(0).getKeyAsString());
272+
assertEquals(5L, result.getBuckets().get(0).getDocCount());
273+
}
274+
);
275+
276+
// Unmapped field, number after key, unmapped sorts before, include unmapped => 0 buckets
277+
testSearchCase(
278+
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
279+
dataset,
280+
() -> new CompositeAggregationBuilder(
281+
"name",
282+
Arrays.asList(
283+
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true).missingOrder(MissingOrder.FIRST)
284+
)
285+
).aggregateAfter(Collections.singletonMap("unmapped", 42)),
286+
(InternalComposite result) -> { assertEquals(0, result.getBuckets().size()); }
287+
);
288+
289+
}
290+
291+
public void testUnmappedTermsLongAfter() throws Exception {
292+
final List<Map<String, List<Object>>> dataset = new ArrayList<>();
293+
dataset.addAll(
294+
Arrays.asList(
295+
createDocument("keyword", "a"),
296+
createDocument("keyword", "c"),
297+
createDocument("keyword", "a"),
298+
createDocument("keyword", "d"),
299+
createDocument("keyword", "c")
300+
)
301+
);
302+
303+
// Unmapped field, number after key, no missing bucket => 0 buckets
304+
testSearchCase(
305+
Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")),
306+
dataset,
307+
() -> new CompositeAggregationBuilder("name", Arrays.asList(new TermsValuesSourceBuilder("unmapped").field("unmapped")))
308+
.aggregateAfter(Collections.singletonMap("unmapped", 42)),
309+
(InternalComposite result) -> { assertEquals(0, result.getBuckets().size()); }
310+
);
220311
}
221312

222313
public void testUnmappedFieldWithGeopoint() throws Exception {

0 commit comments

Comments
 (0)