@@ -139,6 +139,7 @@ message FulltextIndexSettings {
139139 // │ "The" │ 1 │
140140 // │ "The" │ 2 │
141141 // └──────────────┴────┘
142+ // Supports a single column only
142143 FLAT = 1 ;
143144 }
144145
@@ -168,67 +169,84 @@ message FulltextIndexSettings {
168169 KEYWORD = 3 ;
169170 }
170171
172+ // Represents text analyzers settings
173+ message Analyzers {
174+ // See Tokenizer enum
175+ Tokenizer tokenizer = 1 ;
176+
177+ // Language used for language-sensitive operations like stopword filtering
178+ // Example: language = "english"
179+ // By default is not specified and no language-specific logic is applied
180+ string language = 2 ;
181+
182+ // Whether to convert tokens to lowercase
183+ // Example:
184+ // Token: "Quick"
185+ // Output: "quick"
186+ bool use_filter_lowercase = 100 ;
187+
188+ // Whether to remove common stopwords like "the", "a", "is"
189+ // Example: language = "english"
190+ // Tokens: ["the", "quick", "brown"]
191+ // Output: ["quick", "brown"]
192+ bool use_filter_stopwords = 110 ;
193+
194+ // Whether to apply character n-gram indexing to each token
195+ // Must be used with filter_ngram_min_length and filter_ngram_max_length
196+ // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
197+ // Token: "search"
198+ // Output: ["sea", "ear", "arc", "rch", "sear", "earc", "arch"]
199+ bool use_filter_ngram = 120 ;
200+
201+ // Whether to apply edge n-gram indexing (prefix-based) to each token
202+ // Used with filter_ngram_min_length and filter_ngram_max_length
203+ // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
204+ // Token: "search"
205+ // Output: ["sea", "sear"]
206+ bool use_filter_edge_ngram = 121 ;
207+
208+ // Minimum length of n-grams to generate (inclusive)
209+ // Must be used with use_filter_ngram or use_filter_edge_ngram
210+ // Default value is 3
211+ int32 filter_ngram_min_length = 122 [(Ydb.value ) = ">= 0" ];
212+
213+ // Maximum length of n-grams to generate (inclusive)
214+ // Must be used with use_filter_ngram or use_filter_edge_ngram
215+ // Default value is 4
216+ int32 filter_ngram_max_length = 123 [(Ydb.value ) = ">= 0" ];
217+
218+ // Whether to filter tokens by their length
219+ // Must be used with filter_length_min or filter_length_max
220+ // Example: filter_length_min = 4, filter_length_max = 6
221+ // Tokens: ["foo", "fooba", "foobar", "foobarbaz"]
222+ // Output: ["fooba", "foobar"]
223+ bool use_filter_length = 130 ;
224+
225+ // Minimum token length to keep (inclusive)
226+ // Must be used with use_filter_length
227+ int32 filter_length_min = 131 [(Ydb.value ) = ">= 0" ];
228+
229+ // Maximum token length to keep (inclusive)
230+ // Must be used with use_filter_length
231+ int32 filter_length_max = 132 [(Ydb.value ) = ">= 0" ];
232+ }
233+
234+ // Represents text analyzers settings for a specific column
235+ message ColumnAnalyzers {
236+ // Name of the column to be indexed
237+ string column = 1 ;
238+
239+ // Analyzer settings specific to this column
240+ Analyzers analyzers = 2 ;
241+ }
242+
171243 // See Layout enum
172244 Layout layout = 1 ;
173245
174- // See Tokenizer enum
175- Tokenizer tokenizer = 2 ;
176-
177- // Language used for language-sensitive operations like stopword filtering
178- // Example: language = "english"
179- // By default is not specified and no language-specific logic is applied
180- string language = 3 ;
181-
182- // Whether to convert tokens to lowercase
183- // Example:
184- // Token: "Quick"
185- // Output: "quick"
186- bool use_filter_lowercase = 100 ;
187-
188- // Whether to remove common stopwords like "the", "a", "is"
189- // Example: language = "english"
190- // Tokens: ["the", "quick", "brown"]
191- // Output: ["quick", "brown"]
192- bool use_filter_stopwords = 110 ;
193-
194- // Whether to apply character n-gram indexing to each token
195- // Must be used with filter_ngram_min_length and filter_ngram_max_length
196- // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
197- // Token: "search"
198- // Output: ["sea", "ear", "arc", "rch", "sear", "earc", "arch"]
199- bool use_filter_ngram = 120 ;
200-
201- // Whether to apply edge n-gram indexing (prefix-based) to each token
202- // Used with filter_ngram_min_length and filter_ngram_max_length
203- // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
204- // Token: "search"
205- // Output: ["sea", "sear"]
206- bool use_filter_edge_ngram = 121 ;
207-
208- // Minimum length of n-grams to generate (inclusive)
209- // Must be used with use_filter_ngram or use_filter_edge_ngram
210- // Default value is 3
211- int32 filter_ngram_min_length = 122 [(Ydb.value ) = ">= 0" ];
212-
213- // Maximum length of n-grams to generate (inclusive)
214- // Must be used with use_filter_ngram or use_filter_edge_ngram
215- // Default value is 4
216- int32 filter_ngram_max_length = 123 [(Ydb.value ) = ">= 0" ];
217-
218- // Whether to filter tokens by their length
219- // Must be used with filter_length_min or filter_length_max
220- // Example: filter_length_min = 4, filter_length_max = 6
221- // Tokens: ["foo", "fooba", "foobar", "foobarbaz"]
222- // Output: ["fooba", "foobar"]
223- bool use_filter_length = 130 ;
224-
225- // Minimum token length to keep (inclusive)
226- // Must be used with use_filter_length
227- int32 filter_length_min = 131 [(Ydb.value ) = ">= 0" ];
228-
229- // Maximum token length to keep (inclusive)
230- // Must be used with use_filter_length
231- int32 filter_length_max = 132 [(Ydb.value ) = ">= 0" ];
246+ // List of columns and their fulltext settings
247+ // Currently, this list should contain a single entry
248+ // And provided column should be the only one in the TableIndex.index_columns list
249+ repeated ColumnAnalyzers columns = 2 ;
232250}
233251
234252message GlobalFulltextIndex {
0 commit comments