Skip to content

Commit a441675

Browse files
committed
Fixed SignificantTerms on nested fields not finding any background document
1 parent 0b807f4 commit a441675

File tree

2 files changed

+174
-29
lines changed

2 files changed

+174
-29
lines changed

modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml

Lines changed: 159 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,24 @@ setup:
1010
number_of_shards: "1"
1111
mappings:
1212
properties:
13+
value:
14+
type: integer
15+
value_keyword:
16+
type: keyword
1317
nested:
1418
type: nested
1519
properties:
16-
type:
17-
type: keyword
1820
value:
1921
type: integer
22+
value_keyword:
23+
type: keyword
24+
nested:
25+
type: nested
26+
properties:
27+
value:
28+
type: integer
29+
value_keyword:
30+
type: keyword
2031

2132
# Type:normal has many "1" and just one "2". Type:outlier has the same amount of "1" and "2"
2233
- do:
@@ -25,38 +36,38 @@ setup:
2536
refresh: true
2637
body:
2738
- '{ "index": {} }'
28-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
39+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
2940
- '{ "index": {} }'
30-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
41+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
3142
- '{ "index": {} }'
32-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
43+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
3344
- '{ "index": {} }'
34-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
45+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
3546
- '{ "index": {} }'
36-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
47+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
3748
- '{ "index": {} }'
38-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
49+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
3950
- '{ "index": {} }'
40-
- '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }'
51+
- '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
4152
- '{ "index": {} }'
42-
- '{ "type": "normal", "value": 2, "nested": { "type": "normal", "value": 2 } }'
53+
- '{ "type": "normal", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }'
4354

4455
- '{ "index": {} }'
45-
- '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }'
56+
- '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
4657
- '{ "index": {} }'
47-
- '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }'
58+
- '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
4859
- '{ "index": {} }'
49-
- '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }'
60+
- '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
5061
- '{ "index": {} }'
51-
- '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }'
62+
- '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }'
5263
- '{ "index": {} }'
53-
- '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }'
64+
- '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }'
5465
- '{ "index": {} }'
55-
- '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }'
66+
- '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }'
5667
- '{ "index": {} }'
57-
- '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }'
68+
- '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }'
5869
- '{ "index": {} }'
59-
- '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }'
70+
- '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }'
6071

6172
---
6273
"Data checks":
@@ -68,6 +79,7 @@ setup:
6879

6980
- do:
7081
search:
82+
size: 0
7183
rest_total_hits_as_int: true
7284
index: test
7385
body: {
@@ -77,6 +89,11 @@ setup:
7789
"field": "value"
7890
}
7991
},
92+
"value_keyword_terms": {
93+
"terms": {
94+
"field": "value_keyword"
95+
}
96+
},
8097
"nested": {
8198
"nested": {
8299
"path": "nested"
@@ -86,33 +103,79 @@ setup:
86103
"terms": {
87104
"field": "nested.value"
88105
}
106+
},
107+
"nested_value_keyword_terms": {
108+
"terms": {
109+
"field": "nested.value_keyword"
110+
}
111+
},
112+
"nested": {
113+
"nested": {
114+
"path": "nested.nested"
115+
},
116+
"aggs": {
117+
"nested_value_terms": {
118+
"terms": {
119+
"field": "nested.nested.value"
120+
}
121+
},
122+
"nested_value_keyword_terms": {
123+
"terms": {
124+
"field": "nested.nested.value_keyword"
125+
}
126+
}
127+
}
89128
}
90129
}
91130
}
92131
}
93132
}
94133

134+
# Check value
95135
- match: {aggregations.value_terms.buckets.0.key: 1}
96136
- match: {aggregations.value_terms.buckets.0.doc_count: 11}
97137
- match: {aggregations.value_terms.buckets.1.key: 2}
98138
- match: {aggregations.value_terms.buckets.1.doc_count: 5}
99139

140+
# Check value_keyword
141+
- match: {aggregations.value_keyword_terms.buckets.0.key: "1"}
142+
- match: {aggregations.value_keyword_terms.buckets.0.doc_count: 11}
143+
- match: {aggregations.value_keyword_terms.buckets.1.key: "2"}
144+
- match: {aggregations.value_keyword_terms.buckets.1.doc_count: 5}
145+
146+
# Nested
100147
- match: {aggregations.nested.doc_count: 16}
148+
# Check nested value
101149
- match: {aggregations.nested.nested_value_terms.buckets.0.key: 1}
102150
- match: {aggregations.nested.nested_value_terms.buckets.0.doc_count: 11}
103151
- match: {aggregations.nested.nested_value_terms.buckets.1.key: 2}
104152
- match: {aggregations.nested.nested_value_terms.buckets.1.doc_count: 5}
105153

154+
# Check nested value_keyword
155+
- match: {aggregations.nested.nested_value_keyword_terms.buckets.0.key: "1"}
156+
- match: {aggregations.nested.nested_value_keyword_terms.buckets.0.doc_count: 11}
157+
- match: {aggregations.nested.nested_value_keyword_terms.buckets.1.key: "2"}
158+
- match: {aggregations.nested.nested_value_keyword_terms.buckets.1.doc_count: 5}
159+
160+
# Nested>nested
161+
- match: {aggregations.nested.nested.doc_count: 16}
162+
# Check nested value
163+
- match: {aggregations.nested.nested.nested_value_terms.buckets.0.key: 1}
164+
- match: {aggregations.nested.nested.nested_value_terms.buckets.0.doc_count: 11}
165+
- match: {aggregations.nested.nested.nested_value_terms.buckets.1.key: 2}
166+
- match: {aggregations.nested.nested.nested_value_terms.buckets.1.doc_count: 5}
167+
168+
# Check nested value_keyword
169+
- match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.key: "1"}
170+
- match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.doc_count: 11}
171+
- match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.key: "2"}
172+
- match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.doc_count: 5}
173+
106174
---
107175
"Normal fields":
108176
- do:
109177
search:
110-
rest_total_hits_as_int: true
111-
index: test
112-
- match: {hits.total: 16}
113-
114-
- do:
115-
search:
178+
size: 0
116179
rest_total_hits_as_int: true
117180
index: test
118181
body: {
@@ -124,6 +187,11 @@ setup:
124187
"significant_terms": {
125188
"field": "value"
126189
}
190+
},
191+
"significant_terms_keyword": {
192+
"significant_terms": {
193+
"field": "value_keyword"
194+
}
127195
}
128196
}
129197
}
@@ -136,16 +204,67 @@ setup:
136204
- match: {aggregations.significant_terms.buckets.0.bg_count: 5}
137205
- close_to: { aggregations.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }}
138206

207+
- match: {aggregations.significant_terms_keyword.doc_count: 8}
208+
- match: {aggregations.significant_terms_keyword.bg_count: 16}
209+
- length: {aggregations.significant_terms_keyword.buckets: 1}
210+
- match: {aggregations.significant_terms_keyword.buckets.0.key: "2"}
211+
- match: {aggregations.significant_terms_keyword.buckets.0.doc_count: 4}
212+
- match: {aggregations.significant_terms_keyword.buckets.0.bg_count: 5}
213+
- close_to: { aggregations.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }}
214+
139215
---
140216
"Nested fields":
141217
- do:
142218
search:
219+
size: 0
143220
rest_total_hits_as_int: true
144221
index: test
145-
- match: {hits.total: 16}
222+
body: {
223+
"query": {
224+
"terms": { "type": [ "outlier" ] }
225+
},
226+
"aggs": {
227+
"nested": {
228+
"nested": {
229+
"path": "nested"
230+
},
231+
"aggs": {
232+
"significant_terms": {
233+
"significant_terms": {
234+
"field": "nested.value"
235+
}
236+
},
237+
"significant_terms_keyword": {
238+
"significant_terms": {
239+
"field": "nested.value_keyword"
240+
}
241+
}
242+
}
243+
}
244+
}
245+
}
146246

247+
- match: {aggregations.nested.significant_terms.doc_count: 8}
248+
- match: {aggregations.nested.significant_terms.bg_count: 16}
249+
- length: {aggregations.nested.significant_terms.buckets: 1}
250+
- match: {aggregations.nested.significant_terms.buckets.0.key: 2}
251+
- match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4}
252+
- match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5}
253+
- close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }}
254+
255+
- match: {aggregations.nested.significant_terms_keyword.doc_count: 8}
256+
- match: {aggregations.nested.significant_terms_keyword.bg_count: 16}
257+
- length: {aggregations.nested.significant_terms_keyword.buckets: 1}
258+
- match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"}
259+
- match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4}
260+
- match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5}
261+
- close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }}
262+
263+
---
264+
"Doubly nested fields":
147265
- do:
148266
search:
267+
size: 0
149268
rest_total_hits_as_int: true
150269
index: test
151270
body: {
@@ -155,12 +274,17 @@ setup:
155274
"aggs": {
156275
"nested": {
157276
"nested": {
158-
"path": "nested"
277+
"path": "nested.nested"
159278
},
160279
"aggs": {
161280
"significant_terms": {
162281
"significant_terms": {
163-
"field": "nested.value"
282+
"field": "nested.nested.value"
283+
}
284+
},
285+
"significant_terms_keyword": {
286+
"significant_terms": {
287+
"field": "nested.nested.value_keyword"
164288
}
165289
}
166290
}
@@ -175,3 +299,11 @@ setup:
175299
- match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4}
176300
- match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5}
177301
- close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }}
302+
303+
- match: {aggregations.nested.significant_terms_keyword.doc_count: 8}
304+
- match: {aggregations.nested.significant_terms_keyword.bg_count: 16}
305+
- length: {aggregations.nested.significant_terms_keyword.buckets: 1}
306+
- match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"}
307+
- match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4}
308+
- match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5}
309+
- close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.apache.lucene.search.MatchAllDocsQuery;
2020
import org.apache.lucene.search.Query;
2121
import org.apache.lucene.search.TermQuery;
22+
import org.apache.lucene.search.join.ScoreMode;
2223
import org.apache.lucene.util.BytesRef;
2324
import org.elasticsearch.common.lucene.index.FilterableTermsEnum;
2425
import org.elasticsearch.common.util.BigArrays;
@@ -28,6 +29,7 @@
2829
import org.elasticsearch.core.Releasable;
2930
import org.elasticsearch.core.Releasables;
3031
import org.elasticsearch.index.mapper.MappedFieldType;
32+
import org.elasticsearch.index.query.NestedQueryBuilder;
3133
import org.elasticsearch.index.query.QueryBuilder;
3234
import org.elasticsearch.index.query.TermQueryBuilder;
3335
import org.elasticsearch.search.DocValueFormat;
@@ -159,7 +161,7 @@ public void close() {
159161
* Get the background frequency of a {@link BytesRef} term.
160162
*/
161163
private long getBackgroundFrequency(BytesRef term) throws IOException {
162-
return getBackgroundFrequency(context.buildQuery(new TermQueryBuilder(fieldType.name(), format.format(term).toString())));
164+
return getBackgroundFrequency(context.buildQuery(makeBackgroundFrequencyQuery(format.format(term).toString())));
163165
}
164166

165167
/**
@@ -214,7 +216,18 @@ public void close() {
214216
* Get the background frequency of a {@code long} term.
215217
*/
216218
private long getBackgroundFrequency(long term) throws IOException {
217-
return getBackgroundFrequency(context.buildQuery(new TermQueryBuilder(fieldType.name(), format.format(term).toString())));
219+
return getBackgroundFrequency(context.buildQuery(makeBackgroundFrequencyQuery(format.format(term).toString())));
220+
}
221+
222+
private QueryBuilder makeBackgroundFrequencyQuery(String value) {
223+
var nestedParentField = context.nestedLookup().getNestedParent(fieldType.name());
224+
QueryBuilder queryBuilder = new TermQueryBuilder(fieldType.name(), value);
225+
226+
if (nestedParentField != null) {
227+
queryBuilder = new NestedQueryBuilder(nestedParentField, queryBuilder, ScoreMode.Avg);
228+
}
229+
230+
return queryBuilder;
218231
}
219232

220233
private long getBackgroundFrequency(Query query) throws IOException {

0 commit comments

Comments
 (0)