@@ -23,6 +23,7 @@ import org.apache.lucene.analysis.en.PorterStemFilter
2323import org .apache .lucene .analysis .es .SpanishLightStemFilter
2424import org .apache .lucene .analysis .fr .FrenchLightStemFilter
2525import org .apache .lucene .analysis .it .ItalianLightStemFilter
26+ import org .apache .lucene .analysis .de .GermanLightStemFilter
2627import org .apache .lucene .analysis .LowerCaseFilter
2728import org .apache .lucene .analysis .Analyzer
2829import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter
@@ -104,6 +105,8 @@ object AnalyzerBuilder {
104105 new EnglishAnalyzerBuilder (Config .empty, false )
105106 def french : FrenchAnalyzerBuilder =
106107 new FrenchAnalyzerBuilder (Config .empty, false )
108+ def german : GermanAnalyzerBuilder =
109+ new GermanAnalyzerBuilder (Config .empty, false )
107110 def italian : ItalianAnalyzerBuilder =
108111 new ItalianAnalyzerBuilder (Config .empty, false )
109112 def spanish : SpanishAnalyzerBuilder =
@@ -233,3 +236,29 @@ final class ItalianAnalyzerBuilder private[lucene] (
233236 def build [F [_]](implicit F : Sync [F ]): Resource [F , Analyzer ] =
234237 mkFromStandardTokenizer(config)(ts => if (self.stemmer) new ItalianLightStemFilter (ts) else ts)
235238}
239+
240+ final class GermanAnalyzerBuilder private [lucene] (
241+ config : Config ,
242+ stemmer : Boolean ,
243+ ) extends AnalyzerBuilder (config) { self =>
244+ type Builder = GermanAnalyzerBuilder
245+
246+ private def copy (
247+ newConfig : Config ,
248+ stemmer : Boolean = self.stemmer,
249+ ): GermanAnalyzerBuilder =
250+ new GermanAnalyzerBuilder (newConfig, stemmer)
251+
252+ def withConfig (newConfig : Config ): GermanAnalyzerBuilder =
253+ copy(newConfig = newConfig)
254+
255+ /** Adds the GermanLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
256+ * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
257+ * NOTE: Lowercasing is forced as it is required for the Lucene GermanLightStemFilter.
258+ */
259+ def withGermanLightStemmer : GermanAnalyzerBuilder =
260+ copy(config.copy(lowerCase = true ), stemmer = true )
261+
262+ def build [F [_]](implicit F : Sync [F ]): Resource [F , Analyzer ] =
263+ mkFromStandardTokenizer(config)(ts => if (self.stemmer) new GermanLightStemFilter (ts) else ts)
264+ }
0 commit comments