Skip to content

Commit 079cff5

Browse files
authored
Avoid internal server error on suggester ngram bad request (#132321)
1 parent 75ac0ee commit 079cff5

File tree

3 files changed

+152
-1
lines changed

3 files changed

+152
-1
lines changed

docs/changelog/132321.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 132321
2+
summary: Avoid internal server error when suggester requires unigrams but no unigrams are provided, return bad request instead
3+
area: Analysis
4+
type: bug
5+
issues:
6+
- 131928
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.search.suggest.phrase;
11+
12+
import org.elasticsearch.action.search.SearchRequestBuilder;
13+
import org.elasticsearch.action.search.ShardSearchFailure;
14+
import org.elasticsearch.common.settings.Settings;
15+
import org.elasticsearch.rest.RestStatus;
16+
import org.elasticsearch.search.suggest.SuggestBuilder;
17+
import org.elasticsearch.test.ESIntegTestCase;
18+
import org.elasticsearch.xcontent.XContentFactory;
19+
20+
import java.io.IOException;
21+
22+
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
23+
import static org.elasticsearch.search.suggest.SuggestBuilders.phraseSuggestion;
24+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
25+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;
26+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
27+
import static org.hamcrest.Matchers.equalTo;
28+
import static org.hamcrest.Matchers.greaterThan;
29+
30+
public class PhraseSuggesterIT extends ESIntegTestCase {
31+
32+
/**
33+
* Reproduces the IllegalArgumentException: "At least one unigram is required but all tokens were ngrams"
34+
*
35+
* This happens when:
36+
* 1. A phrase suggester is configured to use an analyzer that only produces n-grams (no unigrams)
37+
* 2. The NoisyChannelSpellChecker is created with requireUnigram=true (which is the default)
38+
* 3. The input text is analyzed and all resulting tokens are marked as n-grams
39+
* 4. The NoisyChannelSpellChecker throws an IllegalArgumentException because it expects at least one unigram
40+
*/
41+
public void testPhraseSuggestionWithNgramOnlyAnalyzerThrowsException() throws IOException {
42+
createIndexAndDocs(false);
43+
44+
// Create a phrase suggestion that uses the ngram-only field
45+
// This should trigger the IllegalArgumentException because:
46+
// 1. The "text.ngrams" field uses an analyzer that only produces n-grams
47+
// 2. When "hello world" is analyzed, it produces only n-grams, but no unigrams
48+
// 3. The DirectCandidateGenerator.analyze() method sets anyTokens=true but anyUnigram=false
49+
// 4. NoisyChannelSpellChecker.end() throws IllegalArgumentException
50+
SearchRequestBuilder searchBuilder = createSuggesterSearch("text.ngrams");
51+
52+
assertResponse(searchBuilder, response -> {
53+
assertThat(response.status(), equalTo(RestStatus.OK));
54+
assertThat(response.getFailedShards(), greaterThan(0));
55+
assertThat(response.getShardFailures().length, greaterThan(0));
56+
for (ShardSearchFailure shardFailure : response.getShardFailures()) {
57+
assertTrue(shardFailure.getCause() instanceof IllegalArgumentException);
58+
assertEquals("At least one unigram is required but all tokens were ngrams", shardFailure.getCause().getMessage());
59+
}
60+
});
61+
}
62+
63+
private static SearchRequestBuilder createSuggesterSearch(String fieldName) {
64+
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion(fieldName).text("hello world")
65+
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text").suggestMode("always").minWordLength(1).maxEdits(2));
66+
67+
SearchRequestBuilder searchBuilder = prepareSearch("test").setSize(0)
68+
.suggest(new SuggestBuilder().addSuggestion("test_suggestion", phraseSuggestion));
69+
return searchBuilder;
70+
}
71+
72+
/**
73+
* Demonstrates that the same configuration works fine when using a different field that produces unigrams
74+
*/
75+
public void testPhraseSuggestionWithUnigramFieldWorks() throws IOException {
76+
createIndexAndDocs(false);
77+
78+
// Use the main "text" field instead of "text.ngrams" - this should work fine
79+
// because the standard analyzer produces unigrams
80+
SearchRequestBuilder searchRequestBuilder = createSuggesterSearch("text");
81+
82+
// This should NOT throw an exception
83+
assertNoFailuresAndResponse(searchRequestBuilder, response -> {
84+
// Just verify we get a response without exceptions
85+
assertNotNull(response.getSuggest());
86+
});
87+
}
88+
89+
/**
90+
* Test showing the same ngram-only configuration works when shingle filter allows output_unigrams=true
91+
*/
92+
public void testPhraseSuggestionWithNgramsAndUnigramsWorks() throws IOException {
93+
createIndexAndDocs(true);
94+
95+
// Use the ngrams field, but this time it should work because the analyzer produces unigrams too
96+
SearchRequestBuilder searchRequestBuilder = createSuggesterSearch("text.ngrams");
97+
98+
// This should NOT throw an exception because unigrams are available
99+
assertNoFailuresAndResponse(searchRequestBuilder, response -> { assertNotNull(response.getSuggest()); });
100+
}
101+
102+
private void createIndexAndDocs(boolean outputUnigrams) throws IOException {
103+
// Create an index with a shingle analyzer that outputs unigrams or not
104+
assertAcked(
105+
prepareCreate("test").setSettings(
106+
Settings.builder()
107+
.put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5))
108+
.put("index.analysis.analyzer.ngram_only.tokenizer", "standard")
109+
.putList("index.analysis.analyzer.ngram_only.filter", "my_shingle", "lowercase")
110+
.put("index.analysis.filter.my_shingle.type", "shingle")
111+
.put("index.analysis.filter.my_shingle.output_unigrams", outputUnigrams)
112+
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
113+
.put("index.analysis.filter.my_shingle.max_shingle_size", 3)
114+
)
115+
.setMapping(
116+
XContentFactory.jsonBuilder()
117+
.startObject()
118+
.startObject("_doc")
119+
.startObject("properties")
120+
.startObject("text")
121+
.field("type", "text")
122+
.field("analyzer", "standard")
123+
.startObject("fields")
124+
.startObject("ngrams")
125+
.field("type", "text")
126+
.field("analyzer", "ngram_only") // Use our ngram-only analyzer for suggestions
127+
.endObject()
128+
.endObject()
129+
.endObject()
130+
.endObject()
131+
.endObject()
132+
.endObject()
133+
)
134+
);
135+
136+
ensureGreen();
137+
138+
// Index some test documents
139+
indexDoc("test", "1", "text", "hello world test");
140+
indexDoc("test", "2", "text", "another test phrase");
141+
indexDoc("test", "3", "text", "some more content");
142+
refresh();
143+
}
144+
145+
}

server/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public void end() {
8787
candidateSetsList.add(currentSet);
8888
}
8989
if (requireUnigram && anyUnigram == false && anyTokens) {
90-
throw new IllegalStateException("At least one unigram is required but all tokens were ngrams");
90+
throw new IllegalArgumentException("At least one unigram is required but all tokens were ngrams");
9191
}
9292
}
9393
});

0 commit comments

Comments
 (0)