Skip to content

Commit 1fb6206

Browse files
authored
SignificantText aggregation had include/exclude logic back to front (#64520) (#64538)
Backport bugfix. SignificantText aggregation had include/exclude logic back to front. Added test. Closes #64519
1 parent 4851bc7 commit 1fb6206

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ private void processTokenStream(
256256
scratch.clear();
257257
scratch.copyChars(termAtt);
258258
BytesRef bytes = scratch.get();
259-
if (includeExclude != null && includeExclude.accept(bytes)) {
259+
if (includeExclude != null && false == includeExclude.accept(bytes)) {
260260
continue;
261261
}
262262
if (inDocTerms.add(bytes) < 0) {

server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,65 @@ public void testSignificance() throws IOException {
144144
}
145145
}
146146

147+
/**
148+
* Uses the significant text aggregation to find the keywords in text fields and include/exclude selected terms
149+
*/
150+
public void testIncludeExcludes() throws IOException {
151+
TextFieldType textFieldType = new TextFieldType("text");
152+
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
153+
154+
IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
155+
indexWriterConfig.setMaxBufferedDocs(100);
156+
indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
157+
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
158+
indexDocuments(w);
159+
160+
String [] incExcValues = {"duplicate"};
161+
162+
try (IndexReader reader = DirectoryReader.open(w)) {
163+
assertEquals("test expects a single segment", 1, reader.leaves().size());
164+
IndexSearcher searcher = new IndexSearcher(reader);
165+
166+
// Inclusive of values
167+
{
168+
SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
169+
includeExclude(new IncludeExclude(incExcValues, null));
170+
SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
171+
.subAggregation(sigAgg);
172+
if(randomBoolean()){
173+
sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
174+
}
175+
// Search "even" which should have duplication
176+
InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
177+
SignificantTerms terms = sampler.getAggregations().get("sig_text");
178+
179+
assertNull(terms.getBucketByKey("even"));
180+
assertNotNull(terms.getBucketByKey("duplicate"));
181+
assertTrue(AggregationInspectionHelper.hasValue(sampler));
182+
183+
}
184+
// Exclusive of values
185+
{
186+
SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
187+
includeExclude(new IncludeExclude(null, incExcValues));
188+
SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
189+
.subAggregation(sigAgg);
190+
if(randomBoolean()){
191+
sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
192+
}
193+
// Search "even" which should have duplication
194+
InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
195+
SignificantTerms terms = sampler.getAggregations().get("sig_text");
196+
197+
assertNotNull(terms.getBucketByKey("even"));
198+
assertNull(terms.getBucketByKey("duplicate"));
199+
assertTrue(AggregationInspectionHelper.hasValue(sampler));
200+
201+
}
202+
}
203+
}
204+
}
205+
147206
public void testFieldAlias() throws IOException {
148207
TextFieldType textFieldType = new TextFieldType("text");
149208
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

0 commit comments

Comments
 (0)