@@ -33,9 +33,9 @@ namespace ErrorCodes
3333 extern const int LOGICAL_ERROR;
3434}
3535
36- static inline const String GIN_INDEX_ARGUMENT_TOKENIZER = " tokenizer" ;
37- static inline const String GIN_INDEX_ARGUMENT_NGRAM_SIZE = " ngram_size" ;
38- static inline const String GIN_INDEX_ARGUMENT_MAX_ROWS = " max_rows_per_postings_list" ;
36+ static const String ARGUMENT_TOKENIZER = " tokenizer" ;
37+ static const String ARGUMENT_NGRAM_SIZE = " ngram_size" ;
38+ static const String ARGUMENT_MAX_ROWS = " max_rows_per_postings_list" ;
3939
4040MergeTreeIndexGranuleGin::MergeTreeIndexGranuleGin (
4141 const String & index_name_,
@@ -767,6 +767,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexGin::createIndexCondition(const Actions
767767
768768namespace
769769{
770+
770771std::unordered_map<String, Field> convertArgumentsToOptionsMap (const FieldVector & arguments)
771772{
772773 std::unordered_map<String, Field> options;
@@ -801,16 +802,14 @@ std::optional<Type> getOption(const std::unordered_map<String, Field> & options,
801802 }
802803 return std::nullopt ;
803804}
805+
804806}
805807
806808MergeTreeIndexPtr ginIndexCreator (const IndexDescription & index)
807809{
808- const FieldVector & arguments = index.arguments ;
809- auto options = convertArgumentsToOptionsMap (arguments);
810+ std::unordered_map<String, Field> options = convertArgumentsToOptionsMap (index.arguments );
810811
811- String tokenizer = getOption<String>(options, GIN_INDEX_ARGUMENT_TOKENIZER).value ();
812- UInt64 max_rows_per_postings_list
813- = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_MAX_ROWS).value_or (DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
812+ String tokenizer = getOption<String>(options, ARGUMENT_TOKENIZER).value ();
814813
815814 std::unique_ptr<ITokenExtractor> token_extractor;
816815 if (tokenizer == SplitTokenExtractor::getExternalName ())
@@ -819,49 +818,52 @@ MergeTreeIndexPtr ginIndexCreator(const IndexDescription & index)
819818 token_extractor = std::make_unique<NoOpTokenExtractor>();
820819 else if (tokenizer == NgramTokenExtractor::getExternalName ())
821820 {
822- const UInt64 ngram_size = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_NGRAM_SIZE ).value_or (3 );
821+ UInt64 ngram_size = getOption<UInt64>(options, ARGUMENT_NGRAM_SIZE ).value_or (3 );
823822 token_extractor = std::make_unique<NgramTokenExtractor>(ngram_size);
824823 }
824+ else
825+ throw Exception (ErrorCodes::LOGICAL_ERROR, " Tokenizer {} not supported" , tokenizer);
826+
827+ UInt64 max_rows_per_postings_list = getOption<UInt64>(options, ARGUMENT_MAX_ROWS).value_or (DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
825828
826829 GinFilterParameters params (tokenizer, max_rows_per_postings_list);
827830 return std::make_shared<MergeTreeIndexGin>(index, params, std::move (token_extractor));
828831}
829832
830833void ginIndexValidator (const IndexDescription & index, bool /* attach*/ )
831834{
832- const FieldVector& arguments = index.arguments ;
835+ std::unordered_map<String, Field> options = convertArgumentsToOptionsMap ( index.arguments ) ;
833836
834- if (arguments.empty ())
835- throw Exception (
836- ErrorCodes::INCORRECT_QUERY, " GIN index should have at least '{}' argument, but empty arguments are provided" , GIN_INDEX_ARGUMENT_TOKENIZER);
837+ // / Check that tokenizer is present and supported
838+ std::optional<String> tokenizer = getOption<String>(options, ARGUMENT_TOKENIZER);
839+ if (!tokenizer)
840+ throw Exception (ErrorCodes::INCORRECT_QUERY, " GIN index must have an '{}' argument" , ARGUMENT_TOKENIZER);
837841
838- auto options = convertArgumentsToOptionsMap (arguments);
839-
840- std::optional<String> tokenizer = getOption<String>(options, GIN_INDEX_ARGUMENT_TOKENIZER);
841- if (!tokenizer.has_value ())
842- throw Exception (ErrorCodes::INCORRECT_QUERY, " GIN index must include the '{}' argument" , GIN_INDEX_ARGUMENT_TOKENIZER);
843-
844- const bool is_supported_tokenizer = tokenizer.value () == SplitTokenExtractor::getExternalName ()
845- || tokenizer.value () == NoOpTokenExtractor::getExternalName () || tokenizer.value () == NgramTokenExtractor::getExternalName ();
842+ const bool is_supported_tokenizer = (tokenizer.value () == SplitTokenExtractor::getExternalName ()
843+ || tokenizer.value () == NoOpTokenExtractor::getExternalName ()
844+ || tokenizer.value () == NgramTokenExtractor::getExternalName ());
846845 if (!is_supported_tokenizer)
847846 throw Exception (
848847 ErrorCodes::INCORRECT_QUERY,
849848 " GIN index '{}' argument supports only 'default', 'ngram', and 'noop', but got {}" ,
850- GIN_INDEX_ARGUMENT_TOKENIZER ,
849+ ARGUMENT_TOKENIZER ,
851850 tokenizer.value ());
852851
853852 if (tokenizer.value () == NgramTokenExtractor::getExternalName ())
854853 {
855- const UInt64 ngram_size = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_NGRAM_SIZE ).value_or (3 );
854+ UInt64 ngram_size = getOption<UInt64>(options, ARGUMENT_NGRAM_SIZE ).value_or (3 );
856855 if (ngram_size < 2 || ngram_size > 8 )
857856 throw Exception (
858- ErrorCodes::INCORRECT_QUERY, " GIN index '{}' argument must be between 2 and 8, but got {}" , GIN_INDEX_ARGUMENT_NGRAM_SIZE, ngram_size);
857+ ErrorCodes::INCORRECT_QUERY,
858+ " GIN index '{}' argument must be between 2 and 8, but got {}" , ARGUMENT_NGRAM_SIZE, ngram_size);
859859 }
860860
861- UInt64 max_rows_per_postings_list = getOption<UInt64>(options, GIN_INDEX_ARGUMENT_MAX_ROWS).value_or (DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
861+ // / Check that max_rows_per_postings_list is valid (if present)
862+ UInt64 max_rows_per_postings_list = getOption<UInt64>(options, ARGUMENT_MAX_ROWS).value_or (DEFAULT_MAX_ROWS_PER_POSTINGS_LIST);
862863 if (max_rows_per_postings_list < MIN_ROWS_PER_POSTINGS_LIST)
863864 throw Exception (
864- ErrorCodes::INCORRECT_QUERY, " GIN index '{}' should not be less than {}" , GIN_INDEX_ARGUMENT_MAX_ROWS, MIN_ROWS_PER_POSTINGS_LIST);
865+ ErrorCodes::INCORRECT_QUERY,
866+ " GIN index '{}' should not be less than {}" , ARGUMENT_MAX_ROWS, MIN_ROWS_PER_POSTINGS_LIST);
865867
866868 GinFilterParameters gin_filter_params (tokenizer.value (), max_rows_per_postings_list); // / Just validate
867869
0 commit comments