Skip to content

Commit 65ac607

Browse files
committed
Change IllegalStateException for IllegalArgumentException in NoisyChannelSpellChecker, so we get a bad request instead of an internal server error
1 parent dd3e3c9 commit 65ac607

File tree

2 files changed

+147
-1
lines changed

2 files changed

+147
-1
lines changed
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.search.suggest.phrase;
11+
12+
import org.elasticsearch.ExceptionsHelper;
13+
import org.elasticsearch.action.search.SearchPhaseExecutionException;
14+
import org.elasticsearch.action.search.SearchRequestBuilder;
15+
import org.elasticsearch.common.settings.Settings;
16+
import org.elasticsearch.rest.RestStatus;
17+
import org.elasticsearch.search.suggest.SuggestBuilder;
18+
import org.elasticsearch.test.ESIntegTestCase;
19+
import org.elasticsearch.xcontent.XContentFactory;
20+
21+
import java.io.IOException;
22+
23+
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
24+
import static org.elasticsearch.search.suggest.SuggestBuilders.phraseSuggestion;
25+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
26+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;
27+
import static org.hamcrest.Matchers.equalTo;
28+
29+
public class PhraseSuggesterIT extends ESIntegTestCase {
30+
31+
/**
32+
* Reproduces the IllegalArgumentException: "At least one unigram is required but all tokens were ngrams"
33+
*
34+
* This happens when:
35+
* 1. A phrase suggester is configured to use an analyzer that only produces n-grams (no unigrams)
36+
* 2. The NoisyChannelSpellChecker is created with requireUnigram=true (which is the default)
37+
* 3. The input text is analyzed and all resulting tokens are marked as n-grams
38+
* 4. The NoisyChannelSpellChecker throws an IllegalArgumentException because it expects at least one unigram
39+
*/
40+
public void testPhraseSuggestionWithNgramOnlyAnalyzerThrowsException() throws IOException {
41+
createIndexAndDocs(false);
42+
43+
// Create a phrase suggestion that uses the ngram-only field
44+
// This should trigger the IllegalArgumentException because:
45+
// 1. The "text.ngrams" field uses an analyzer that only produces n-grams
46+
// 2. When "hello world" is analyzed, it produces only n-grams, but no unigrams
47+
// 3. The DirectCandidateGenerator.analyze() method sets anyTokens=true but anyUnigram=false
48+
// 4. NoisyChannelSpellChecker.end() throws IllegalArgumentException
49+
SearchRequestBuilder searchBuilder = createSuggesterSearch("text.ngrams");
50+
51+
// This should throw SearchPhaseExecutionException wrapping IllegalArgumentException
52+
SearchPhaseExecutionException exception = expectThrows(SearchPhaseExecutionException.class, searchBuilder);
53+
assertNotNull(exception.getCause());
54+
assertThat(exception.status(), equalTo(RestStatus.BAD_REQUEST));
55+
Throwable rootCause = ExceptionsHelper.unwrap(exception, IllegalArgumentException.class);
56+
57+
assertTrue(
58+
"Expected IllegalArgumentException but got: " + rootCause.getClass().getSimpleName(),
59+
rootCause instanceof IllegalArgumentException
60+
);
61+
assertEquals("At least one unigram is required but all tokens were ngrams", rootCause.getMessage());
62+
}
63+
64+
private static SearchRequestBuilder createSuggesterSearch(String fieldName) {
65+
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion(fieldName).text("hello world")
66+
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("text").suggestMode("always").minWordLength(1).maxEdits(2));
67+
68+
SearchRequestBuilder searchBuilder = prepareSearch("test").setSize(0)
69+
.suggest(new SuggestBuilder().addSuggestion("test_suggestion", phraseSuggestion));
70+
return searchBuilder;
71+
}
72+
73+
/**
74+
* Demonstrates that the same configuration works fine when using a different field that produces unigrams
75+
*/
76+
public void testPhraseSuggestionWithUnigramFieldWorks() throws IOException {
77+
createIndexAndDocs(false);
78+
79+
// Use the main "text" field instead of "text.ngrams" - this should work fine
80+
// because the standard analyzer produces unigrams
81+
SearchRequestBuilder searchRequestBuilder = createSuggesterSearch("text");
82+
83+
// This should NOT throw an exception
84+
assertNoFailuresAndResponse(searchRequestBuilder, response -> {
85+
// Just verify we get a response without exceptions
86+
assertNotNull(response.getSuggest());
87+
});
88+
}
89+
90+
/**
91+
* Test showing the same ngram-only configuration works when shingle filter allows output_unigrams=true
92+
*/
93+
public void testPhraseSuggestionWithNgramsAndUnigramsWorks() throws IOException {
94+
createIndexAndDocs(true);
95+
96+
// Use the ngrams field, but this time it should work because the analyzer produces unigrams too
97+
SearchRequestBuilder searchRequestBuilder = createSuggesterSearch("text.ngrams");
98+
99+
// This should NOT throw an exception because unigrams are available
100+
assertNoFailuresAndResponse(searchRequestBuilder, response -> { assertNotNull(response.getSuggest()); });
101+
}
102+
103+
private void createIndexAndDocs(boolean allowUnigrams) throws IOException {
104+
// Create an index with a shingle analyzer that outputs NO unigrams (only n-grams)
105+
assertAcked(
106+
prepareCreate("test").setSettings(
107+
Settings.builder()
108+
.put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5))
109+
.put("index.analysis.analyzer.ngram_only.tokenizer", "standard")
110+
.putList("index.analysis.analyzer.ngram_only.filter", "my_shingle", "lowercase")
111+
.put("index.analysis.filter.my_shingle.type", "shingle")
112+
.put("index.analysis.filter.my_shingle.output_unigrams", allowUnigrams)
113+
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
114+
.put("index.analysis.filter.my_shingle.max_shingle_size", 3)
115+
)
116+
.setMapping(
117+
XContentFactory.jsonBuilder()
118+
.startObject()
119+
.startObject("_doc")
120+
.startObject("properties")
121+
.startObject("text")
122+
.field("type", "text")
123+
.field("analyzer", "standard")
124+
.startObject("fields")
125+
.startObject("ngrams")
126+
.field("type", "text")
127+
.field("analyzer", "ngram_only") // Use our ngram-only analyzer for suggestions
128+
.endObject()
129+
.endObject()
130+
.endObject()
131+
.endObject()
132+
.endObject()
133+
.endObject()
134+
)
135+
);
136+
137+
ensureGreen();
138+
139+
// Index some test documents
140+
indexDoc("test", "1", "text", "hello world test");
141+
indexDoc("test", "2", "text", "another test phrase");
142+
indexDoc("test", "3", "text", "some more content");
143+
refresh();
144+
}
145+
146+
}

server/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public void end() {
8787
candidateSetsList.add(currentSet);
8888
}
8989
if (requireUnigram && anyUnigram == false && anyTokens) {
90-
throw new IllegalStateException("At least one unigram is required but all tokens were ngrams");
90+
throw new IllegalArgumentException("At least one unigram is required but all tokens were ngrams");
9191
}
9292
}
9393
});

0 commit comments

Comments
 (0)