@@ -113,149 +113,7 @@ message GlobalVectorKMeansTreeIndex {
113113 KMeansTreeSettings vector_settings = 3 ;
114114}
115115
116- message FulltextIndexSettings {
117- // Specifies the layout strategy for storing and updating the full-text index
118- enum Layout {
119- LAYOUT_UNSPECIFIED = 0 ;
120-
121- // Uses a single flat inverted index table (indexImplTable)
122- // Example source table:
123- // ┌────┬────────────────────────────┐
124- // │ id │ text │
125- // ├────┼────────────────────────────┤
126- // │ 1 │ "The quick brown fox" │
127- // │ 2 │ "The quick blue hare" │
128- // └────┴────────────────────────────┘
129- // Example inverted index table (indexImplTable):
130- // ┌──────────────┬────┐
131- // │ __ydb_token │ id │
132- // ├──────────────┼────┤
133- // │ "blue" │ 2 │
134- // │ "brown" │ 1 │
135- // │ "fox" │ 1 │
136- // │ "hare" │ 2 │
137- // │ "quick" │ 1 │
138- // │ "quick" │ 2 │
139- // │ "The" │ 1 │
140- // │ "The" │ 2 │
141- // └──────────────┴────┘
142- // Supports a single column only
143- FLAT = 1 ;
144- }
145-
146- // Specifies how text is tokenized during indexing
147- enum Tokenizer {
148- TOKENIZER_UNSPECIFIED = 0 ;
149-
150- // Splits text only by whitespace
151- // Does not split on punctuation
152- // Example:
153- // Text: "foo-bar baz_lorem ipsum"
154- // Tokens: ["foo-bar", "baz_lorem", "ipsum"]
155- WHITESPACE = 1 ;
156-
157- // Applies general language-aware tokenization
158- // Splits text on whitespace and punctuation
159- // Example:
160- // Text: "foo-bar baz_lorem ipsum"
161- // Tokens: ["foo", "bar", "baz", "lorem", "ipsum"]
162- STANDARD = 2 ;
163-
164- // Treats the entire input as a single token
165- // No splitting is performed
166- // Example:
167- // Text: "Hello World!"
168- // Tokens: ["Hello World!"]
169- KEYWORD = 3 ;
170- }
171-
172- // Represents text analyzers settings
173- message Analyzers {
174- // See Tokenizer enum
175- optional Tokenizer tokenizer = 1 ;
176-
177- // Language used for language-sensitive operations like stopword filtering
178- // Example: language = "english"
179- // By default is not specified and no language-specific logic is applied
180- optional string language = 2 ;
181-
182- // Whether to convert tokens to lowercase
183- // Example:
184- // Token: "Quick"
185- // Output: "quick"
186- optional bool use_filter_lowercase = 100 ;
187-
188- // Whether to remove common stopwords like "the", "a", "is"
189- // Example: language = "english"
190- // Tokens: ["the", "quick", "brown"]
191- // Output: ["quick", "brown"]
192- optional bool use_filter_stopwords = 110 ;
193-
194- // Whether to apply character n-gram indexing to each token
195- // Must be used with filter_ngram_min_length and filter_ngram_max_length
196- // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
197- // Token: "search"
198- // Output: ["sea", "ear", "arc", "rch", "sear", "earc", "arch"]
199- optional bool use_filter_ngram = 120 ;
200-
201- // Whether to apply edge n-gram indexing (prefix-based) to each token
202- // Used with filter_ngram_min_length and filter_ngram_max_length
203- // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
204- // Token: "search"
205- // Output: ["sea", "sear"]
206- optional bool use_filter_edge_ngram = 121 ;
207-
208- // Minimum length of n-grams to generate (inclusive)
209- // Must be used with use_filter_ngram or use_filter_edge_ngram
210- // Default value is 3
211- optional int32 filter_ngram_min_length = 122 [(Ydb.value ) = ">= 0" ];
212-
213- // Maximum length of n-grams to generate (inclusive)
214- // Must be used with use_filter_ngram or use_filter_edge_ngram
215- // Default value is 4
216- optional int32 filter_ngram_max_length = 123 [(Ydb.value ) = ">= 0" ];
217-
218- // Whether to filter tokens by their length
219- // Must be used with filter_length_min or filter_length_max
220- // Example: filter_length_min = 4, filter_length_max = 6
221- // Tokens: ["foo", "fooba", "foobar", "foobarbaz"]
222- // Output: ["fooba", "foobar"]
223- optional bool use_filter_length = 130 ;
224-
225- // Minimum token length to keep (inclusive)
226- // Must be used with use_filter_length
227- optional int32 filter_length_min = 131 [(Ydb.value ) = ">= 0" ];
228-
229- // Maximum token length to keep (inclusive)
230- // Must be used with use_filter_length
231- optional int32 filter_length_max = 132 [(Ydb.value ) = ">= 0" ];
232- }
233-
234- // Represents text analyzers settings for a specific column
235- message ColumnAnalyzers {
236- // Name of the column to be indexed
237- optional string column = 1 ;
238-
239- // Analyzer settings specific to this column
240- Analyzers analyzers = 2 ;
241- }
242-
243- // See Layout enum
244- optional Layout layout = 1 ;
245-
246- // List of columns and their fulltext settings
247- // Currently, this list should contain a single entry with specified analyzers
248- // Later, some columns may not use analyzers and will be indexed as-is
249- // This list must always match TableIndex.index_columns
250- repeated ColumnAnalyzers columns = 2 ;
251- }
252-
253- message GlobalFulltextIndex {
254- GlobalIndexSettings settings = 1 ;
255- FulltextIndexSettings fulltext_settings = 2 ;
256- }
257-
258- // Represent table index
116+ // Represent secondary index
259117message TableIndex {
260118 // Name of index
261119 string name = 1 ;
@@ -267,13 +125,12 @@ message TableIndex {
267125 GlobalAsyncIndex global_async_index = 4 ;
268126 GlobalUniqueIndex global_unique_index = 6 ;
269127 GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 7 ;
270- GlobalFulltextIndex global_fulltext_index = 8 ;
271128 }
272129 // list of columns content to be copied in to index table
273130 repeated string data_columns = 5 ;
274131}
275132
276- // Represent table index with index state
133+ // Represent secondary index with index state
277134message TableIndexDescription {
278135 enum Status {
279136 STATUS_UNSPECIFIED = 0 ;
@@ -292,7 +149,6 @@ message TableIndexDescription {
292149 GlobalAsyncIndex global_async_index = 5 ;
293150 GlobalUniqueIndex global_unique_index = 8 ;
294151 GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 9 ;
295- GlobalFulltextIndex global_fulltext_index = 10 ;
296152 }
297153 Status status = 4 ;
298154 // list of columns content to be copied in to index table
@@ -792,7 +648,7 @@ message CreateTableRequest {
792648 // Table profile
793649 TableProfile profile = 5 ;
794650 Ydb.Operations.OperationParams operation_params = 6 ;
795- // List of table indexes
651+ // List of secondary indexes
796652 repeated TableIndex indexes = 7 ;
797653 // Table rows time to live settings
798654 TtlSettings ttl_settings = 8 ;
@@ -870,9 +726,9 @@ message AlterTableRequest {
870726 TtlSettings set_ttl_settings = 7 ;
871727 google.protobuf.Empty drop_ttl_settings = 8 ;
872728 }
873- // Add table indexes
729+ // Add secondary indexes
874730 repeated TableIndex add_indexes = 9 ;
875- // Remove table indexes
731+ // Remove secondary indexes
876732 repeated string drop_indexes = 10 ;
877733 // Change table storage settings
878734 StorageSettings alter_storage_settings = 11 ;
0 commit comments