Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/changelog/113143.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
pr: 113143
summary: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
area: Analysis
type: deprecation
issues: []
deprecation:
title: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
area: Analysis
details: kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed.
impact: These stemmers will be removed and will no longer supported.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: "no longer be supported"

Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`,
`Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`,
`Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`.

deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version]

For example:

[source,console]
Expand All @@ -28,7 +30,7 @@ PUT /my-index-000001
"filter": {
"my_snow": {
"type": "snowball",
"language": "Lovins"
"language": "Irish"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*]

Dutch::
https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*],
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`]
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version]

English::
https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*],
https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`],
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`],
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version],
https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`],
https://snowballstem.org/algorithms/english/stemmer.html[`porter2`],
{lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
Expand Down Expand Up @@ -81,6 +83,8 @@

public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class);

private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();

private String language;
Expand All @@ -90,6 +94,20 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
if ("lovins".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"lovins_deprecation",
"The [lovins] stemmer is deprecated and will be removed in a future version."
);
}
if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"dutch_kp_deprecation",
"The [dutch_kp] stemmer is deprecated and will be removed in a future version."
);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.elasticsearch.common.logging.HeaderWarning;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
Expand All @@ -23,16 +25,33 @@
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.index.IndexVersionUtils;
import org.junit.After;
import org.junit.Before;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED;
import static org.hamcrest.Matchers.instanceOf;

public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {

private ThreadContext threadContext;

@Before
public final void before() {
this.threadContext = new ThreadContext(Settings.EMPTY);
HeaderWarning.setThreadContext(threadContext);
}

@After
public final void after() {
HeaderWarning.removeThreadContext(threadContext);
threadContext = null;
}

private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin();

public void testEnglishFilterFactory() throws IOException {
Expand Down Expand Up @@ -103,4 +122,32 @@ public void testMultipleLanguagesThrowsException() throws IOException {
);
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}

public void testKpDeprecation() throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_kp.type", "stemmer")
.put("index.analysis.filter.my_kp.language", "kp")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
final List<String> actualWarningStrings = threadContext.getResponseHeaders().get("Warning");
assertTrue(actualWarningStrings.stream().anyMatch(warning -> warning.contains("The [dutch_kp] stemmer is deprecated")));
}

public void testLovinsDeprecation() throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_lovins.type", "stemmer")
.put("index.analysis.filter.my_lovins.language", "lovins")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
final List<String> actualWarningStrings = threadContext.getResponseHeaders().get("Warning");
assertTrue(actualWarningStrings.stream().anyMatch(warning -> warning.contains("The [lovins] stemmer is deprecated")));
}
}