|
17 | 17 |
|
18 | 18 | #include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h" |
19 | 19 |
|
| 20 | +#include <glog/logging.h> |
| 21 | + |
20 | 22 | #include "CLucene.h" |
21 | 23 | #include "CLucene/analysis/LanguageBasedAnalyzer.h" |
22 | 24 |
|
|
42 | 44 | namespace doris::segment_v2::inverted_index { |
43 | 45 | #include "common/compile_check_begin.h" |
44 | 46 |
|
45 | | -ReaderPtr InvertedIndexAnalyzer::create_reader(CharFilterMap& char_filter_map) { |
| 47 | +ReaderPtr InvertedIndexAnalyzer::create_reader(const CharFilterMap& char_filter_map) { |
46 | 48 | ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>(); |
47 | 49 | if (!char_filter_map.empty()) { |
48 | | - if (char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] == |
49 | | - INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE) { |
50 | | - reader = std::make_shared<CharReplaceCharFilter>( |
51 | | - reader, char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN], |
52 | | - char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT]); |
| 50 | + auto it_type = char_filter_map.find(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE); |
| 51 | + if (it_type != char_filter_map.end() && |
| 52 | + it_type->second == INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE) { |
| 53 | + auto it_pattern = char_filter_map.find(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN); |
| 54 | + auto it_replacement = |
| 55 | + char_filter_map.find(INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT); |
| 56 | + if (it_pattern != char_filter_map.end() && it_replacement != char_filter_map.end()) { |
| 57 | + reader = std::make_shared<CharReplaceCharFilter>(reader, it_pattern->second, |
| 58 | + it_replacement->second); |
| 59 | + } |
53 | 60 | } |
54 | 61 | } |
55 | 62 | return reader; |
@@ -123,21 +130,19 @@ AnalyzerPtr InvertedIndexAnalyzer::create_builtin_analyzer(InvertedIndexParserTy |
123 | 130 | return analyzer; |
124 | 131 | } |
125 | 132 |
|
126 | | -std::shared_ptr<lucene::analysis::Analyzer> InvertedIndexAnalyzer::create_analyzer( |
127 | | - const InvertedIndexCtx* inverted_index_ctx) { |
128 | | - const std::string& analyzer_name = inverted_index_ctx->analyzer_name; |
| 133 | +AnalyzerPtr InvertedIndexAnalyzer::create_analyzer(const InvertedIndexAnalyzerConfig* config) { |
| 134 | + DCHECK(config != nullptr); |
| 135 | + const std::string& analyzer_name = config->analyzer_name; |
129 | 136 | if (analyzer_name.empty()) { |
130 | | - return create_builtin_analyzer( |
131 | | - inverted_index_ctx->parser_type, inverted_index_ctx->parser_mode, |
132 | | - inverted_index_ctx->lower_case, inverted_index_ctx->stop_words); |
| 137 | + return create_builtin_analyzer(config->parser_type, config->parser_mode, config->lower_case, |
| 138 | + config->stop_words); |
133 | 139 | } |
134 | 140 |
|
135 | 141 | if (is_builtin_analyzer(analyzer_name)) { |
136 | 142 | InvertedIndexParserType parser_type = |
137 | 143 | get_inverted_index_parser_type_from_string(analyzer_name); |
138 | | - return create_builtin_analyzer(parser_type, inverted_index_ctx->parser_mode, |
139 | | - inverted_index_ctx->lower_case, |
140 | | - inverted_index_ctx->stop_words); |
| 144 | + return create_builtin_analyzer(parser_type, config->parser_mode, config->lower_case, |
| 145 | + config->stop_words); |
141 | 146 | } |
142 | 147 |
|
143 | 148 | auto* index_policy_mgr = doris::ExecEnv::GetInstance()->index_policy_mgr(); |
@@ -176,18 +181,16 @@ std::vector<TermInfo> InvertedIndexAnalyzer::get_analyse_result( |
176 | 181 |
|
177 | 182 | std::vector<TermInfo> InvertedIndexAnalyzer::get_analyse_result( |
178 | 183 | const std::string& search_str, const std::map<std::string, std::string>& properties) { |
179 | | - InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>( |
180 | | - get_analyzer_name_from_properties(properties), |
181 | | - get_inverted_index_parser_type_from_string( |
182 | | - get_parser_string_from_properties(properties)), |
183 | | - get_parser_mode_string_from_properties(properties), |
184 | | - get_parser_phrase_support_string_from_properties(properties), |
185 | | - get_parser_char_filter_map_from_properties(properties), |
186 | | - get_parser_lowercase_from_properties(properties), |
187 | | - get_parser_stopwords_from_properties(properties)); |
188 | | - auto analyzer = create_analyzer(inverted_index_ctx.get()); |
189 | | - inverted_index_ctx->analyzer = analyzer.get(); |
190 | | - auto reader = create_reader(inverted_index_ctx->char_filter_map); |
| 184 | + InvertedIndexAnalyzerConfig config; |
| 185 | + config.analyzer_name = get_analyzer_name_from_properties(properties); |
| 186 | + config.parser_type = get_inverted_index_parser_type_from_string( |
| 187 | + get_parser_string_from_properties(properties)); |
| 188 | + config.parser_mode = get_parser_mode_string_from_properties(properties); |
| 189 | + config.lower_case = get_parser_lowercase_from_properties(properties); |
| 190 | + config.stop_words = get_parser_stopwords_from_properties(properties); |
| 191 | + config.char_filter_map = get_parser_char_filter_map_from_properties(properties); |
| 192 | + auto analyzer = create_analyzer(&config); |
| 193 | + auto reader = create_reader(config.char_filter_map); |
191 | 194 | reader->init(search_str.data(), static_cast<int32_t>(search_str.size()), true); |
192 | 195 | return get_analyse_result(reader, analyzer.get()); |
193 | 196 | } |
|
0 commit comments