Skip to content

Commit e016823

Browse files
authored
Merge pull request #50 from valencik/add-toSet
Add helper function for `defaultStopWords`
2 parents bd24dae + 78b6978 commit e016823

File tree

1 file changed

+17
-51
lines changed

1 file changed

+17
-51
lines changed

lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala

Lines changed: 17 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,15 @@ object Config {
7272
sealed abstract class AnalyzerBuilder private[lucene] (config: Config) {
7373
type Builder <: AnalyzerBuilder
7474

75+
private[lucene] def toSet(cs: CharArraySet): Set[String] =
76+
cs.asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
77+
78+
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
79+
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
80+
* build the default StopFilter
81+
*/
7582
def defaultStopWords: Set[String]
83+
7684
def withConfig(config: Config): Builder
7785

7886
/** Adds a lowercasing stage to the analyzer pipeline */
@@ -146,7 +154,7 @@ final class DefaultAnalyzerBuilder private[lucene] (config: Config)
146154
extends AnalyzerBuilder(config) { self =>
147155
type Builder = DefaultAnalyzerBuilder
148156

149-
lazy val defaultStopWords: Set[String] = Set.empty
157+
val defaultStopWords: Set[String] = Set.empty
150158

151159
def withConfig(newConfig: Config): DefaultAnalyzerBuilder =
152160
new DefaultAnalyzerBuilder(newConfig)
@@ -194,12 +202,7 @@ final class EnglishAnalyzerBuilder private[lucene] (
194202
def withConfig(newConfig: Config): EnglishAnalyzerBuilder =
195203
copy(newConfig = newConfig)
196204

197-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
198-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
199-
* build the default StopFilter
200-
*/
201-
lazy val defaultStopWords: Set[String] =
202-
getEnglishStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
205+
lazy val defaultStopWords: Set[String] = toSet(getEnglishStopSet())
203206

204207
/** Adds the Porter Stemmer to the end of the analyzer pipeline and enables lowercasing.
205208
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -231,12 +234,7 @@ final class FrenchAnalyzerBuilder private[lucene] (
231234
def withConfig(newConfig: Config): FrenchAnalyzerBuilder =
232235
copy(newConfig = newConfig)
233236

234-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
235-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
236-
* build the default StopFilter
237-
*/
238-
lazy val defaultStopWords: Set[String] =
239-
getFrenchStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
237+
lazy val defaultStopWords: Set[String] = toSet(getFrenchStopSet())
240238

241239
/** Adds the FrenchLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
242240
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -267,12 +265,7 @@ final class SpanishAnalyzerBuilder private[lucene] (
267265
def withConfig(newConfig: Config): SpanishAnalyzerBuilder =
268266
copy(newConfig = newConfig)
269267

270-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
271-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
272-
* build the default StopFilter
273-
*/
274-
lazy val defaultStopWords: Set[String] =
275-
getSpanishStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
268+
lazy val defaultStopWords: Set[String] = toSet(getSpanishStopSet())
276269

277270
/** Adds the SpanishLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
278271
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -303,12 +296,7 @@ final class ItalianAnalyzerBuilder private[lucene] (
303296
def withConfig(newConfig: Config): ItalianAnalyzerBuilder =
304297
copy(newConfig = newConfig)
305298

306-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
307-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
308-
* build the default StopFilter
309-
*/
310-
lazy val defaultStopWords: Set[String] =
311-
getItalianStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
299+
lazy val defaultStopWords: Set[String] = toSet(getItalianStopSet())
312300

313301
/** Adds the ItalianLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
314302
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -339,12 +327,7 @@ final class GermanAnalyzerBuilder private[lucene] (
339327
def withConfig(newConfig: Config): GermanAnalyzerBuilder =
340328
copy(newConfig = newConfig)
341329

342-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
343-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
344-
* build the default StopFilter
345-
*/
346-
lazy val defaultStopWords: Set[String] =
347-
getGermanStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
330+
lazy val defaultStopWords: Set[String] = toSet(getGermanStopSet())
348331

349332
/** Adds the GermanLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
350333
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -375,12 +358,7 @@ final class DutchAnalyzerBuilder private[lucene] (
375358
def withConfig(newConfig: Config): DutchAnalyzerBuilder =
376359
copy(newConfig = newConfig)
377360

378-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
379-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
380-
* build the default StopFilter
381-
*/
382-
lazy val defaultStopWords: Set[String] =
383-
getDutchStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
361+
lazy val defaultStopWords: Set[String] = toSet(getDutchStopSet())
384362

385363
/** Adds the Dutch Snowball Stemmer to the end of the analyzer pipeline and enables lowercasing.
386364
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -413,12 +391,7 @@ final class PortugueseAnalyzerBuilder private[lucene] (
413391
def withConfig(newConfig: Config): PortugueseAnalyzerBuilder =
414392
copy(newConfig = newConfig)
415393

416-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
417-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
418-
* build the default StopFilter
419-
*/
420-
lazy val defaultStopWords: Set[String] =
421-
getPortugueseStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
394+
lazy val defaultStopWords: Set[String] = toSet(getPortugueseStopSet())
422395

423396
/** Adds the PortugueseLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
424397
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -450,14 +423,7 @@ final class BrazilianPortugueseAnalyzerBuilder private[lucene] (
450423
def withConfig(newConfig: Config): BrazilianPortugueseAnalyzerBuilder =
451424
copy(newConfig = newConfig)
452425

453-
/** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
454-
* This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
455-
* build the default StopFilter
456-
*/
457-
lazy val defaultStopWords: Set[String] =
458-
getBrazilianPortugueseStopSet().asScala
459-
.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]]))
460-
.toSet
426+
lazy val defaultStopWords: Set[String] = toSet(getBrazilianPortugueseStopSet())
461427

462428
/** Adds the Brazilian Stemmer to the end of the analyzer pipeline and enables lowercasing.
463429
* Stemming reduces words like `jumping` and `jumps` to their root word `jump`.

0 commit comments

Comments
 (0)