|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the "Elastic License |
| 4 | + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side |
| 5 | + * Public License v 1"; you may not use this file except in compliance with, at |
| 6 | + * your election, the "Elastic License 2.0", the "GNU Affero General Public |
| 7 | + * License v3.0 only", or the "Server Side Public License, v 1". |
| 8 | + */ |
| 9 | + |
| 10 | +package org.elasticsearch.search.suggest.phrase; |
| 11 | + |
| 12 | +import org.elasticsearch.action.search.SearchRequestBuilder; |
| 13 | +import org.elasticsearch.action.search.ShardSearchFailure; |
| 14 | +import org.elasticsearch.common.settings.Settings; |
| 15 | +import org.elasticsearch.rest.RestStatus; |
| 16 | +import org.elasticsearch.search.suggest.SuggestBuilder; |
| 17 | +import org.elasticsearch.test.ESIntegTestCase; |
| 18 | +import org.elasticsearch.xcontent.XContentFactory; |
| 19 | + |
| 20 | +import java.io.IOException; |
| 21 | + |
| 22 | +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; |
| 23 | +import static org.elasticsearch.search.suggest.SuggestBuilders.phraseSuggestion; |
| 24 | +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; |
| 25 | +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; |
| 26 | +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; |
| 27 | +import static org.hamcrest.Matchers.equalTo; |
| 28 | +import static org.hamcrest.Matchers.greaterThan; |
| 29 | + |
| 30 | +public class PhraseSuggesterIT extends ESIntegTestCase { |
| 31 | + |
| 32 | + /** |
| 33 | + * Reproduces the IllegalArgumentException: "At least one unigram is required but all tokens were ngrams" |
| 34 | + * |
| 35 | + * This happens when: |
| 36 | + * 1. A phrase suggester is configured to use an analyzer that only produces n-grams (no unigrams) |
| 37 | + * 2. The NoisyChannelSpellChecker is created with requireUnigram=true (which is the default) |
| 38 | + * 3. The input text is analyzed and all resulting tokens are marked as n-grams |
| 39 | + * 4. The NoisyChannelSpellChecker throws an IllegalArgumentException because it expects at least one unigram |
| 40 | + */ |
| 41 | + public void testPhraseSuggestionWithNgramOnlyAnalyzerThrowsException() throws IOException { |
| 42 | + createIndexAndDocs(false); |
| 43 | + |
| 44 | + // Create a phrase suggestion that uses the ngram-only field |
| 45 | + // This should trigger the IllegalArgumentException because: |
| 46 | + // 1. The "text.ngrams" field uses an analyzer that only produces n-grams |
| 47 | + // 2. When "hello world" is analyzed, it produces only n-grams, but no unigrams |
| 48 | + // 3. The DirectCandidateGenerator.analyze() method sets anyTokens=true but anyUnigram=false |
| 49 | + // 4. NoisyChannelSpellChecker.end() throws IllegalArgumentException |
| 50 | + SearchRequestBuilder searchBuilder = createSuggesterSearch("text.ngrams"); |
| 51 | + |
| 52 | + assertResponse(searchBuilder, response -> { |
| 53 | + assertThat(response.status(), equalTo(RestStatus.OK)); |
| 54 | + assertThat(response.getFailedShards(), greaterThan(0)); |
| 55 | + assertThat(response.getShardFailures().length, greaterThan(0)); |
| 56 | + for (ShardSearchFailure shardFailure : response.getShardFailures()) { |
| 57 | + assertTrue(shardFailure.getCause() instanceof IllegalArgumentException); |
| 58 | + assertEquals("At least one unigram is required but all tokens were ngrams", shardFailure.getCause().getMessage()); |
| 59 | + } |
| 60 | + }); |
| 61 | + } |
| 62 | + |
| 63 | + private static SearchRequestBuilder createSuggesterSearch(String fieldName) { |
| 64 | + PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion(fieldName).text("hello world") |
| 65 | + .addCandidateGenerator(new DirectCandidateGeneratorBuilder("text").suggestMode("always").minWordLength(1).maxEdits(2)); |
| 66 | + |
| 67 | + SearchRequestBuilder searchBuilder = prepareSearch("test").setSize(0) |
| 68 | + .suggest(new SuggestBuilder().addSuggestion("test_suggestion", phraseSuggestion)); |
| 69 | + return searchBuilder; |
| 70 | + } |
| 71 | + |
| 72 | + /** |
| 73 | + * Demonstrates that the same configuration works fine when using a different field that produces unigrams |
| 74 | + */ |
| 75 | + public void testPhraseSuggestionWithUnigramFieldWorks() throws IOException { |
| 76 | + createIndexAndDocs(false); |
| 77 | + |
| 78 | + // Use the main "text" field instead of "text.ngrams" - this should work fine |
| 79 | + // because the standard analyzer produces unigrams |
| 80 | + SearchRequestBuilder searchRequestBuilder = createSuggesterSearch("text"); |
| 81 | + |
| 82 | + // This should NOT throw an exception |
| 83 | + assertNoFailuresAndResponse(searchRequestBuilder, response -> { |
| 84 | + // Just verify we get a response without exceptions |
| 85 | + assertNotNull(response.getSuggest()); |
| 86 | + }); |
| 87 | + } |
| 88 | + |
| 89 | + /** |
| 90 | + * Test showing the same ngram-only configuration works when shingle filter allows output_unigrams=true |
| 91 | + */ |
| 92 | + public void testPhraseSuggestionWithNgramsAndUnigramsWorks() throws IOException { |
| 93 | + createIndexAndDocs(true); |
| 94 | + |
| 95 | + // Use the ngrams field, but this time it should work because the analyzer produces unigrams too |
| 96 | + SearchRequestBuilder searchRequestBuilder = createSuggesterSearch("text.ngrams"); |
| 97 | + |
| 98 | + // This should NOT throw an exception because unigrams are available |
| 99 | + assertNoFailuresAndResponse(searchRequestBuilder, response -> { assertNotNull(response.getSuggest()); }); |
| 100 | + } |
| 101 | + |
| 102 | + private void createIndexAndDocs(boolean outputUnigrams) throws IOException { |
| 103 | + // Create an index with a shingle analyzer that outputs unigrams or not |
| 104 | + assertAcked( |
| 105 | + prepareCreate("test").setSettings( |
| 106 | + Settings.builder() |
| 107 | + .put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)) |
| 108 | + .put("index.analysis.analyzer.ngram_only.tokenizer", "standard") |
| 109 | + .putList("index.analysis.analyzer.ngram_only.filter", "my_shingle", "lowercase") |
| 110 | + .put("index.analysis.filter.my_shingle.type", "shingle") |
| 111 | + .put("index.analysis.filter.my_shingle.output_unigrams", outputUnigrams) |
| 112 | + .put("index.analysis.filter.my_shingle.min_shingle_size", 2) |
| 113 | + .put("index.analysis.filter.my_shingle.max_shingle_size", 3) |
| 114 | + ) |
| 115 | + .setMapping( |
| 116 | + XContentFactory.jsonBuilder() |
| 117 | + .startObject() |
| 118 | + .startObject("_doc") |
| 119 | + .startObject("properties") |
| 120 | + .startObject("text") |
| 121 | + .field("type", "text") |
| 122 | + .field("analyzer", "standard") |
| 123 | + .startObject("fields") |
| 124 | + .startObject("ngrams") |
| 125 | + .field("type", "text") |
| 126 | + .field("analyzer", "ngram_only") // Use our ngram-only analyzer for suggestions |
| 127 | + .endObject() |
| 128 | + .endObject() |
| 129 | + .endObject() |
| 130 | + .endObject() |
| 131 | + .endObject() |
| 132 | + .endObject() |
| 133 | + ) |
| 134 | + ); |
| 135 | + |
| 136 | + ensureGreen(); |
| 137 | + |
| 138 | + // Index some test documents |
| 139 | + indexDoc("test", "1", "text", "hello world test"); |
| 140 | + indexDoc("test", "2", "text", "another test phrase"); |
| 141 | + indexDoc("test", "3", "text", "some more content"); |
| 142 | + refresh(); |
| 143 | + } |
| 144 | + |
| 145 | +} |
0 commit comments