Skip to content

Commit 9d19ded

Browse files
ubytegithub-actions[bot]
authored andcommitted
Added the ability to use the GetFullTopicPath function with arbitrary topic names. (#24623)
1 parent aa3401b commit 9d19ded

File tree

4 files changed

+9
-153
lines changed

4 files changed

+9
-153
lines changed

.github/last_commit.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
9c1b4170d6c376675a5384e54d06d3cc74eaabe4
1+
a259bb7ecddc03e6802baf19019a34a051b2559d

src/api/protos/ydb_table.proto

Lines changed: 5 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -113,149 +113,7 @@ message GlobalVectorKMeansTreeIndex {
113113
KMeansTreeSettings vector_settings = 3;
114114
}
115115

116-
message FulltextIndexSettings {
117-
// Specifies the layout strategy for storing and updating the full-text index
118-
enum Layout {
119-
LAYOUT_UNSPECIFIED = 0;
120-
121-
// Uses a single flat inverted index table (indexImplTable)
122-
// Example source table:
123-
// ┌────┬────────────────────────────┐
124-
// │ id │ text │
125-
// ├────┼────────────────────────────┤
126-
// │ 1 │ "The quick brown fox" │
127-
// │ 2 │ "The quick blue hare" │
128-
// └────┴────────────────────────────┘
129-
// Example inverted index table (indexImplTable):
130-
// ┌──────────────┬────┐
131-
// │ __ydb_token │ id │
132-
// ├──────────────┼────┤
133-
// │ "blue" │ 2 │
134-
// │ "brown" │ 1 │
135-
// │ "fox" │ 1 │
136-
// │ "hare" │ 2 │
137-
// │ "quick" │ 1 │
138-
// │ "quick" │ 2 │
139-
// │ "The" │ 1 │
140-
// │ "The" │ 2 │
141-
// └──────────────┴────┘
142-
// Supports a single column only
143-
FLAT = 1;
144-
}
145-
146-
// Specifies how text is tokenized during indexing
147-
enum Tokenizer {
148-
TOKENIZER_UNSPECIFIED = 0;
149-
150-
// Splits text only by whitespace
151-
// Does not split on punctuation
152-
// Example:
153-
// Text: "foo-bar baz_lorem ipsum"
154-
// Tokens: ["foo-bar", "baz_lorem", "ipsum"]
155-
WHITESPACE = 1;
156-
157-
// Applies general language-aware tokenization
158-
// Splits text on whitespace and punctuation
159-
// Example:
160-
// Text: "foo-bar baz_lorem ipsum"
161-
// Tokens: ["foo", "bar", "baz", "lorem", "ipsum"]
162-
STANDARD = 2;
163-
164-
// Treats the entire input as a single token
165-
// No splitting is performed
166-
// Example:
167-
// Text: "Hello World!"
168-
// Tokens: ["Hello World!"]
169-
KEYWORD = 3;
170-
}
171-
172-
// Represents text analyzers settings
173-
message Analyzers {
174-
// See Tokenizer enum
175-
optional Tokenizer tokenizer = 1;
176-
177-
// Language used for language-sensitive operations like stopword filtering
178-
// Example: language = "english"
179-
// By default is not specified and no language-specific logic is applied
180-
optional string language = 2;
181-
182-
// Whether to convert tokens to lowercase
183-
// Example:
184-
// Token: "Quick"
185-
// Output: "quick"
186-
optional bool use_filter_lowercase = 100;
187-
188-
// Whether to remove common stopwords like "the", "a", "is"
189-
// Example: language = "english"
190-
// Tokens: ["the", "quick", "brown"]
191-
// Output: ["quick", "brown"]
192-
optional bool use_filter_stopwords = 110;
193-
194-
// Whether to apply character n-gram indexing to each token
195-
// Must be used with filter_ngram_min_length and filter_ngram_max_length
196-
// Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
197-
// Token: "search"
198-
// Output: ["sea", "ear", "arc", "rch", "sear", "earc", "arch"]
199-
optional bool use_filter_ngram = 120;
200-
201-
// Whether to apply edge n-gram indexing (prefix-based) to each token
202-
// Used with filter_ngram_min_length and filter_ngram_max_length
203-
// Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
204-
// Token: "search"
205-
// Output: ["sea", "sear"]
206-
optional bool use_filter_edge_ngram = 121;
207-
208-
// Minimum length of n-grams to generate (inclusive)
209-
// Must be used with use_filter_ngram or use_filter_edge_ngram
210-
// Default value is 3
211-
optional int32 filter_ngram_min_length = 122 [(Ydb.value) = ">= 0"];
212-
213-
// Maximum length of n-grams to generate (inclusive)
214-
// Must be used with use_filter_ngram or use_filter_edge_ngram
215-
// Default value is 4
216-
optional int32 filter_ngram_max_length = 123 [(Ydb.value) = ">= 0"];
217-
218-
// Whether to filter tokens by their length
219-
// Must be used with filter_length_min or filter_length_max
220-
// Example: filter_length_min = 4, filter_length_max = 6
221-
// Tokens: ["foo", "fooba", "foobar", "foobarbaz"]
222-
// Output: ["fooba", "foobar"]
223-
optional bool use_filter_length = 130;
224-
225-
// Minimum token length to keep (inclusive)
226-
// Must be used with use_filter_length
227-
optional int32 filter_length_min = 131 [(Ydb.value) = ">= 0"];
228-
229-
// Maximum token length to keep (inclusive)
230-
// Must be used with use_filter_length
231-
optional int32 filter_length_max = 132 [(Ydb.value) = ">= 0"];
232-
}
233-
234-
// Represents text analyzers settings for a specific column
235-
message ColumnAnalyzers {
236-
// Name of the column to be indexed
237-
optional string column = 1;
238-
239-
// Analyzer settings specific to this column
240-
Analyzers analyzers = 2;
241-
}
242-
243-
// See Layout enum
244-
optional Layout layout = 1;
245-
246-
// List of columns and their fulltext settings
247-
// Currently, this list should contain a single entry with specified analyzers
248-
// Later, some columns may not use analyzers and will be indexed as-is
249-
// This list must always match TableIndex.index_columns
250-
repeated ColumnAnalyzers columns = 2;
251-
}
252-
253-
message GlobalFulltextIndex {
254-
GlobalIndexSettings settings = 1;
255-
FulltextIndexSettings fulltext_settings = 2;
256-
}
257-
258-
// Represent table index
116+
// Represent secondary index
259117
message TableIndex {
260118
// Name of index
261119
string name = 1;
@@ -267,13 +125,12 @@ message TableIndex {
267125
GlobalAsyncIndex global_async_index = 4;
268126
GlobalUniqueIndex global_unique_index = 6;
269127
GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 7;
270-
GlobalFulltextIndex global_fulltext_index = 8;
271128
}
272129
// list of columns content to be copied in to index table
273130
repeated string data_columns = 5;
274131
}
275132

276-
// Represent table index with index state
133+
// Represent secondary index with index state
277134
message TableIndexDescription {
278135
enum Status {
279136
STATUS_UNSPECIFIED = 0;
@@ -292,7 +149,6 @@ message TableIndexDescription {
292149
GlobalAsyncIndex global_async_index = 5;
293150
GlobalUniqueIndex global_unique_index = 8;
294151
GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 9;
295-
GlobalFulltextIndex global_fulltext_index = 10;
296152
}
297153
Status status = 4;
298154
// list of columns content to be copied in to index table
@@ -792,7 +648,7 @@ message CreateTableRequest {
792648
// Table profile
793649
TableProfile profile = 5;
794650
Ydb.Operations.OperationParams operation_params = 6;
795-
// List of table indexes
651+
// List of secondary indexes
796652
repeated TableIndex indexes = 7;
797653
// Table rows time to live settings
798654
TtlSettings ttl_settings = 8;
@@ -870,9 +726,9 @@ message AlterTableRequest {
870726
TtlSettings set_ttl_settings = 7;
871727
google.protobuf.Empty drop_ttl_settings = 8;
872728
}
873-
// Add table indexes
729+
// Add secondary indexes
874730
repeated TableIndex add_indexes = 9;
875-
// Remove table indexes
731+
// Remove secondary indexes
876732
repeated string drop_indexes = 10;
877733
// Change table storage settings
878734
StorageSettings alter_storage_settings = 11;

src/client/topic/ut/ut_utils/topic_sdk_test_setup.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@ std::string TTopicSdkTestSetup::GetDatabase() const {
151151
return Database_;
152152
}
153153

154-
std::string TTopicSdkTestSetup::GetFullTopicPath() const {
155-
return GetDatabase() + "/" + GetTopicPath();
154+
std::string TTopicSdkTestSetup::GetFullTopicPath(const std::string& name) const {
155+
return GetDatabase() + "/" + GetTopicPath(name);
156156
}
157157

158158
std::vector<std::uint32_t> TTopicSdkTestSetup::GetNodeIds() {

src/client/topic/ut/ut_utils/topic_sdk_test_setup.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class TTopicSdkTestSetup : public ITopicTestSetup {
5151
std::string GetEndpoint() const override;
5252
std::string GetDatabase() const override;
5353

54-
std::string GetFullTopicPath() const;
54+
std::string GetFullTopicPath(const std::string& name = TEST_TOPIC) const;
5555

5656
std::vector<std::uint32_t> GetNodeIds() override;
5757
std::uint16_t GetPort() const override;

0 commit comments

Comments
 (0)