Skip to content

Commit 9268fa5

Browse files
committed
Cosmetics
1 parent 642242a commit 9268fa5

File tree

5 files changed

+142
-110
lines changed

5 files changed

+142
-110
lines changed

src/Interpreters/GinFilter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ GinFilterParameters::GinFilterParameters(String tokenizer_, UInt64 max_rows_per_
2121
, max_rows_per_postings_list(max_rows_per_postings_list_)
2222
{
2323
if (max_rows_per_postings_list == UNLIMITED_ROWS_PER_POSTINGS_LIST)
24-
max_rows_per_postings_list = std::numeric_limits<UInt32>::max();
24+
max_rows_per_postings_list = std::numeric_limits<UInt64>::max();
2525
}
2626

2727
GinFilter::GinFilter(const GinFilterParameters & params_)

src/Storages/IndicesDescription.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace
3030
using ReplaceAliasToExprVisitor = InDepthNodeVisitor<ReplaceAliasByExpressionMatcher, true>;
3131

3232

33-
[[nodiscard]] inline Tuple parseGinIndexArgumentFromAST(const auto & arguments)
33+
Tuple parseGinIndexArgumentFromAST(const auto & arguments)
3434
{
3535
const auto & idenfitier = arguments->children[0]->template as<ASTIdentifier>();
3636
if (idenfitier == nullptr)
@@ -46,7 +46,7 @@ using ReplaceAliasToExprVisitor = InDepthNodeVisitor<ReplaceAliasByExpressionMat
4646
return key_value_pair;
4747
}
4848

49-
[[nodiscard]] bool parseGinIndexArgumentsFromAST(const auto & arguments, FieldVector & parsed_arguments)
49+
bool parseGinIndexArgumentsFromAST(const auto & arguments, FieldVector & parsed_arguments)
5050
{
5151
parsed_arguments.reserve(arguments->children.size());
5252

@@ -168,12 +168,10 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
168168

169169
if (index_type && index_type->arguments)
170170
{
171-
bool is_gin_function = index_type->name == "gin" || index_type->name == "inverted" || index_type->name == "full_text";
172-
if (is_gin_function && parseGinIndexArgumentsFromAST(index_type->arguments, result.arguments))
173-
{
171+
bool is_gin_index = index_type->name == "gin" || index_type->name == "inverted" || index_type->name == "full_text";
172+
if (is_gin_index && parseGinIndexArgumentsFromAST(index_type->arguments, result.arguments))
174173
return result;
175-
}
176-
// we still support previous GIN index description syntax for the backward compatibility.
174+
177175
for (size_t i = 0; i < index_type->arguments->children.size(); ++i)
178176
{
179177
const auto & child = index_type->arguments->children[i];

src/Storages/MergeTree/MergeTreeIndexGin.cpp

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ namespace ErrorCodes
3333
extern const int LOGICAL_ERROR;
3434
}
3535

36-
static inline const String GIN_INDEX_ARGUMENT_TOKENIZER = "tokenizer";
37-
static inline const String GIN_INDEX_ARGUMENT_NGRAM_SIZE = "ngram_size";
38-
static inline const String GIN_INDEX_ARGUMENT_MAX_ROWS = "max_rows_per_postings_list";
36+
static const String ARGUMENT_TOKENIZER = "tokenizer";
37+
static const String ARGUMENT_NGRAM_SIZE = "ngram_size";
38+
static const String ARGUMENT_MAX_ROWS = "max_rows_per_postings_list";
3939

4040
MergeTreeIndexGranuleGin::MergeTreeIndexGranuleGin(
4141
const String & index_name_,
@@ -767,6 +767,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexGin::createIndexCondition(const Actions
767767

768768
namespace
769769
{
770+
770771
std::unordered_map<String, Field> convertArgumentsToOptionsMap(const FieldVector & arguments)
771772
{
772773
std::unordered_map<String, Field> options;
@@ -801,16 +802,14 @@ std::optional<Type> getOption(const std::unordered_map<String, Field> & options,
801802
}
802803
return std::nullopt;
803804
}
805+
804806
}
805807

806808
MergeTreeIndexPtr ginIndexCreator(const IndexDescription & index)
807809
{
808-
const FieldVector & arguments = index.arguments;
809-
auto options = convertArgumentsToOptionsMap(arguments);
810+
std::unordered_map<String, Field> options = convertArgumentsToOptionsMap(index.arguments);
810811

811-
String tokenizer = getOption<String>(options, GIN_INDEX_ARGUMENT_TOKENIZER).value();
812-
UInt64 max_rows_per_postings_list
813-
= getOption<UInt64>(options, GIN_INDEX_ARGUMENT_MAX_ROWS).value_or(DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
812+
String tokenizer = getOption<String>(options, ARGUMENT_TOKENIZER).value();
814813

815814
std::unique_ptr<ITokenExtractor> token_extractor;
816815
if (tokenizer == SplitTokenExtractor::getExternalName())
@@ -819,49 +818,52 @@ MergeTreeIndexPtr ginIndexCreator(const IndexDescription & index)
819818
token_extractor = std::make_unique<NoOpTokenExtractor>();
820819
else if (tokenizer == NgramTokenExtractor::getExternalName())
821820
{
822-
const UInt64 ngram_size = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_NGRAM_SIZE).value_or(3);
821+
UInt64 ngram_size = getOption<UInt64>(options, ARGUMENT_NGRAM_SIZE).value_or(3);
823822
token_extractor = std::make_unique<NgramTokenExtractor>(ngram_size);
824823
}
824+
else
825+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Tokenizer {} not supported", tokenizer);
826+
827+
UInt64 max_rows_per_postings_list = getOption<UInt64>(options, ARGUMENT_MAX_ROWS).value_or(DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
825828

826829
GinFilterParameters params(tokenizer, max_rows_per_postings_list);
827830
return std::make_shared<MergeTreeIndexGin>(index, params, std::move(token_extractor));
828831
}
829832

830833
void ginIndexValidator(const IndexDescription & index, bool /*attach*/)
831834
{
832-
const FieldVector& arguments = index.arguments;
835+
std::unordered_map<String, Field> options = convertArgumentsToOptionsMap(index.arguments);
833836

834-
if (arguments.empty())
835-
throw Exception(
836-
ErrorCodes::INCORRECT_QUERY, "GIN index should have at least '{}' argument, but empty arguments are provided", GIN_INDEX_ARGUMENT_TOKENIZER);
837+
/// Check that tokenizer is present and supported
838+
std::optional<String> tokenizer = getOption<String>(options, ARGUMENT_TOKENIZER);
839+
if (!tokenizer)
840+
throw Exception(ErrorCodes::INCORRECT_QUERY, "GIN index must have an '{}' argument", ARGUMENT_TOKENIZER);
837841

838-
auto options = convertArgumentsToOptionsMap(arguments);
839-
840-
std::optional<String> tokenizer = getOption<String>(options, GIN_INDEX_ARGUMENT_TOKENIZER);
841-
if (!tokenizer.has_value())
842-
throw Exception(ErrorCodes::INCORRECT_QUERY, "GIN index must include the '{}' argument", GIN_INDEX_ARGUMENT_TOKENIZER);
843-
844-
const bool is_supported_tokenizer = tokenizer.value() == SplitTokenExtractor::getExternalName()
845-
|| tokenizer.value() == NoOpTokenExtractor::getExternalName() || tokenizer.value() == NgramTokenExtractor::getExternalName();
842+
const bool is_supported_tokenizer = (tokenizer.value() == SplitTokenExtractor::getExternalName()
843+
|| tokenizer.value() == NoOpTokenExtractor::getExternalName()
844+
|| tokenizer.value() == NgramTokenExtractor::getExternalName());
846845
if (!is_supported_tokenizer)
847846
throw Exception(
848847
ErrorCodes::INCORRECT_QUERY,
849848
"GIN index '{}' argument supports only 'default', 'ngram', and 'noop', but got {}",
850-
GIN_INDEX_ARGUMENT_TOKENIZER,
849+
ARGUMENT_TOKENIZER,
851850
tokenizer.value());
852851

853852
if (tokenizer.value() == NgramTokenExtractor::getExternalName())
854853
{
855-
const UInt64 ngram_size = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_NGRAM_SIZE).value_or(3);
854+
UInt64 ngram_size = getOption<UInt64>(options, ARGUMENT_NGRAM_SIZE).value_or(3);
856855
if (ngram_size < 2 || ngram_size > 8)
857856
throw Exception(
858-
ErrorCodes::INCORRECT_QUERY, "GIN index '{}' argument must be between 2 and 8, but got {}", GIN_INDEX_ARGUMENT_NGRAM_SIZE, ngram_size);
857+
ErrorCodes::INCORRECT_QUERY,
858+
"GIN index '{}' argument must be between 2 and 8, but got {}", ARGUMENT_NGRAM_SIZE, ngram_size);
859859
}
860860

861-
UInt64 max_rows_per_postings_list = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_MAX_ROWS).value_or(DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
861+
/// Check that max_rows_per_postings_list is valid (if present)
862+
UInt64 max_rows_per_postings_list = getOption<UInt64>(options, ARGUMENT_MAX_ROWS).value_or(DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
862863
if (max_rows_per_postings_list < MIN_ROWS_PER_POSTINGS_LIST)
863864
throw Exception(
864-
ErrorCodes::INCORRECT_QUERY, "GIN index '{}' should not be less than {}", GIN_INDEX_ARGUMENT_MAX_ROWS, MIN_ROWS_PER_POSTINGS_LIST);
865+
ErrorCodes::INCORRECT_QUERY,
866+
"GIN index '{}' should not be less than {}", ARGUMENT_MAX_ROWS, MIN_ROWS_PER_POSTINGS_LIST);
865867

866868
GinFilterParameters gin_filter_params(tokenizer.value(), max_rows_per_postings_list); /// Just validate
867869

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1-
GIN index key-value support.
2-
tokenizer should be default, ngram or noop.
3-
ngram size must be between 2 and 8.
4-
max_rows_per_posting_list should be at least 8192.
5-
shuffled parameters.
6-
incorrect types.
7-
multiple definitions of the same argument.
1+
Must not have no arguments.
2+
Test single tokenizer argument.
3+
-- tokenizer must be default, ngram or noop.
4+
Test ngram_size argument.
5+
-- ngram size must be between 2 and 8.
6+
Test max_rows_per_postings_list argument.
7+
-- max_rows_per_posting_list should be at least 8192.
8+
Parameters are shuffled.
9+
Types are incorrect.
10+
Same argument appears >1 times.
811
Must be created on single column.
912
Must be created on String or FixedString or Array(String) or Array(FixedString) or LowCardinality(String) or LowCardinality(FixedString) columns.

0 commit comments

Comments
 (0)