You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: specification/_types/analysis/StopWords.ts
+40-1Lines changed: 40 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -17,10 +17,49 @@
17
17
* under the License.
18
18
*/
19
19
20
+
exportenumStopWord{
21
+
_arabic_,
22
+
_armenian_,
23
+
_basque_,
24
+
_bengali_,
25
+
_brazilian_,
26
+
_bulgarian_,
27
+
_catalan_,
28
+
_cjk_,
29
+
_czech_,
30
+
_danish_,
31
+
_dutch_,
32
+
_english_,
33
+
_estonian_,
34
+
_finnish_,
35
+
_french_,
36
+
_galician_,
37
+
_german_,
38
+
_greek_,
39
+
_hindi_,
40
+
_hungarian_,
41
+
_indonesian_,
42
+
_irish_,
43
+
_italian_,
44
+
_latvian_,
45
+
_lithuanian_,
46
+
_norwegian_,
47
+
_persian_,
48
+
_portuguese_,
49
+
_romanian_,
50
+
_russian_,
51
+
_serbian_,
52
+
_sorani_,
53
+
_spanish_,
54
+
_swedish_,
55
+
_thai_,
56
+
_turkish_
57
+
}
58
+
20
59
/**
21
60
* Language value, such as _arabic_ or _thai_. Defaults to _english_.
22
61
* Each language value corresponds to a predefined list of stop words in Lucene. See Stop words by language for supported language values and their stop words.
/** A list of tokens. The filter generates bigrams for these tokens.
222
+
* Either this or the `common_words_path` parameter is required. */
218
223
common_words?: string[]
224
+
/** Path to a file containing a list of tokens. The filter generates bigrams for these tokens.
225
+
* This path must be absolute or relative to the `config` location. The file must be UTF-8 encoded. Each token in the file must be separated by a line break.
226
+
* Either this or the `common_words` parameter is required. */
219
227
common_words_path?: string
228
+
/** If `true`, matches for common words matching are case-insensitive. Defaults to `false`. */
220
229
ignore_case?: boolean
230
+
/** If `true`, the filter excludes the following tokens from the output:
231
+
* - Unigrams for common words
232
+
* - Unigrams for terms followed by common words
233
+
* Defaults to `false`. We recommend enabling this parameter for search analyzers. */
/** Array of token filters. If a token matches the predicate script in the `script` parameter, these filters are applied to the token in the order provided. */
226
240
filter: string[]
241
+
/** Predicate script used to apply token filters. If a token matches this script, the filters in the `filter` parameter are applied to the token. */
* To be removed, the elision must be at the beginning of a token and be immediately followed by an apostrophe. Both the elision and apostrophe are removed.
249
+
* For custom `elision` filters, either this parameter or `articles_path` must be specified. */
232
250
articles?: string[]
251
+
/** Path to a file that contains a list of elisions to remove.
252
+
* This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each elision in the file must be separated by a line break.
253
+
* To be removed, the elision must be at the beginning of a token and be immediately followed by an apostrophe. Both the elision and apostrophe are removed.
254
+
* For custom `elision` filters, either this parameter or `articles` must be specified. */
233
255
articles_path?: string
256
+
/** If `true`, elision matching is case insensitive. If `false`, elision matching is case sensitive. Defaults to `false`. */
/** Maximum character length, including whitespace, of the output token. Defaults to `255`. Concatenated tokens longer than this will result in no token output. */
239
263
max_output_size?: integer
264
+
/** Character to use to concatenate the token stream input. Defaults to a space. */
/** If `true`, duplicate tokens are removed from the filter’s output. Defaults to `true`. */
245
271
dedup?: boolean
272
+
/** One or more `.dic` files (e.g, `en_US.dic`, my_custom.dic) to use for the Hunspell dictionary.
273
+
* By default, the `hunspell` filter uses all `.dic` files in the `<$ES_PATH_CONF>/hunspell/<locale>` directory specified using the `lang`, `language`, or `locale` parameter. */
246
274
dictionary?: string
275
+
/** Locale directory used to specify the `.aff` and `.dic` files for a Hunspell dictionary.
276
+
* @aliases lang, language */
247
277
locale: string
278
+
/** If `true`, only the longest stemmed version of each token is included in the output. If `false`, all stemmed versions of the token are included. Defaults to `false`. */
/** List of words to keep. Only tokens that match words in this list are included in the output.
298
+
* Either this parameter or `keep_words_path` must be specified. */
269
299
keep_words?: string[]
300
+
/** If `true`, lowercase all keep words. Defaults to `false`. */
270
301
keep_words_case?: boolean
302
+
/** Path to a file that contains a list of words to keep. Only tokens that match words in this list are included in the output.
303
+
* This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each word in the file must be separated by a line break.
304
+
* Either this parameter or `keep_words` must be specified. */
/** If `true`, matching for the `keywords` and `keywords_path` parameters ignores letter case. Defaults to `false`. */
276
311
ignore_case?: boolean
312
+
/** Array of keywords. Tokens that match these keywords are not stemmed.
313
+
* This parameter, `keywords_path`, or `keywords_pattern` must be specified. You cannot specify this parameter and `keywords_pattern`. */
277
314
keywords?: string|string[]
315
+
/** Path to a file that contains a list of keywords. Tokens that match these keywords are not stemmed.
316
+
* This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each word in the file must be separated by a line break.
317
+
* This parameter, `keywords`, or `keywords_pattern` must be specified. You cannot specify this parameter and `keywords_pattern`. */
278
318
keywords_path?: string
319
+
/** Java regular expression used to match tokens. Tokens that match this expression are marked as keywords and not stemmed.
320
+
* This parameter, `keywords`, or `keywords_path` must be specified. You cannot specify this parameter and `keywords` or `keywords_pattern`. */
279
321
keywords_pattern?: string
280
322
}
281
323
@@ -285,50 +327,65 @@ export class KStemTokenFilter extends TokenFilterBase {
/** Maximum character length of a token. Longer tokens are excluded from the output. Defaults to `Integer.MAX_VALUE`, which is `2^31-1` or `2147483647`. */
288
331
max?: integer
332
+
/** Minimum character length of a token. Shorter tokens are excluded from the output. Defaults to `0`. */
/** If `true`, all substrings matching the pattern parameter’s regular expression are replaced. If `false`, the filter replaces only the first matching substring in each token. Defaults to `true`. */
329
385
all?: boolean
330
-
flags?: string
386
+
/** Regular expression, written in Java’s regular expression syntax. The filter replaces token substrings matching this pattern with the substring in the `replacement` parameter. */
331
387
pattern: string
388
+
/** Replacement substring. Defaults to an empty substring (`""`). */
332
389
replacement?: string
333
390
}
334
391
@@ -338,6 +395,7 @@ export class PorterStemTokenFilter extends TokenFilterBase {
0 commit comments