Skip to content

Commit 6902296

Browse files
Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 (#113143) (#113815)
Lucene 10 has upgraded its Snowball stemming support, as part of those upgrades, two no longer supported stemmers were removed, `KpStemmer` and `LovinsStemmer`. These are `dutch_kp` and `lovins`, respectively. We will deprecate in 8.16 and will remove support for these in a future version. Co-authored-by: Elastic Machine <[email protected]>
1 parent 8a8ad1b commit 6902296

File tree

5 files changed

+59
-4
lines changed

5 files changed

+59
-4
lines changed

docs/changelog/113143.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pr: 113143
2+
summary: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
3+
area: Analysis
4+
type: deprecation
5+
issues: []
6+
deprecation:
7+
title: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
8+
area: Analysis
9+
details: kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed.
10+
impact: These stemmers will be removed and will be no longer supported.

docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`,
1111
`Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`,
1212
`Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`.
1313

14+
deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version]
15+
1416
For example:
1517

1618
[source,console]
@@ -28,7 +30,7 @@ PUT /my-index-000001
2830
"filter": {
2931
"my_snow": {
3032
"type": "snowball",
31-
"language": "Lovins"
33+
"language": "English"
3234
}
3335
}
3436
}

docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,12 @@ https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*]
144144
145145
Dutch::
146146
https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*],
147-
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`]
147+
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version]
148148
149149
English::
150150
https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*],
151151
https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`],
152-
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`],
152+
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version],
153153
https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`],
154154
https://snowballstem.org/algorithms/english/stemmer.html[`porter2`],
155155
{lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`]

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
import org.apache.lucene.analysis.snowball.SnowballFilter;
4848
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
4949
import org.elasticsearch.common.Strings;
50+
import org.elasticsearch.common.logging.DeprecationCategory;
51+
import org.elasticsearch.common.logging.DeprecationLogger;
5052
import org.elasticsearch.common.settings.Settings;
5153
import org.elasticsearch.env.Environment;
5254
import org.elasticsearch.index.IndexSettings;
@@ -81,6 +83,8 @@
8183

8284
public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
8385

86+
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class);
87+
8488
private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();
8589

8690
private String language;
@@ -90,6 +94,20 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
9094
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
9195
// check that we have a valid language by trying to create a TokenStream
9296
create(EMPTY_TOKEN_STREAM).close();
97+
if ("lovins".equalsIgnoreCase(language)) {
98+
deprecationLogger.critical(
99+
DeprecationCategory.ANALYSIS,
100+
"lovins_deprecation",
101+
"The [lovins] stemmer is deprecated and will be removed in a future version."
102+
);
103+
}
104+
if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
105+
deprecationLogger.critical(
106+
DeprecationCategory.ANALYSIS,
107+
"dutch_kp_deprecation",
108+
"The [dutch_kp] stemmer is deprecated and will be removed in a future version."
109+
);
110+
}
93111
}
94112

95113
@Override

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import static org.hamcrest.Matchers.instanceOf;
3333

3434
public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
35-
3635
private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin();
3736

3837
public void testEnglishFilterFactory() throws IOException {
@@ -103,4 +102,30 @@ public void testMultipleLanguagesThrowsException() throws IOException {
103102
);
104103
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
105104
}
105+
106+
public void testKpDeprecation() throws IOException {
107+
IndexVersion v = IndexVersionUtils.randomVersion(random());
108+
Settings settings = Settings.builder()
109+
.put("index.analysis.filter.my_kp.type", "stemmer")
110+
.put("index.analysis.filter.my_kp.language", "kp")
111+
.put(SETTING_VERSION_CREATED, v)
112+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
113+
.build();
114+
115+
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
116+
assertCriticalWarnings("The [dutch_kp] stemmer is deprecated and will be removed in a future version.");
117+
}
118+
119+
public void testLovinsDeprecation() throws IOException {
120+
IndexVersion v = IndexVersionUtils.randomVersion(random());
121+
Settings settings = Settings.builder()
122+
.put("index.analysis.filter.my_lovins.type", "stemmer")
123+
.put("index.analysis.filter.my_lovins.language", "lovins")
124+
.put(SETTING_VERSION_CREATED, v)
125+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
126+
.build();
127+
128+
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
129+
assertCriticalWarnings("The [lovins] stemmer is deprecated and will be removed in a future version.");
130+
}
106131
}

0 commit comments

Comments
 (0)