Skip to content

Commit 1bbb464

Browse files
authored
[fix](inverted-index) Fix boolean query AllScorer combination handling (#60237)
1 parent 34e239a commit 1bbb464

File tree

5 files changed

+525
-26
lines changed

5 files changed

+525
-26
lines changed

be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,9 @@ std::optional<CombinationMethod> OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
112112
} else if (adjusted_minimum == 1) {
113113
return Required {scorer_union(std::move(should_scorers), combiner)};
114114
} else if (adjusted_minimum == num_of_should_scorers) {
115-
must_scorers.swap(should_scorers);
115+
for (auto& scorer : should_scorers) {
116+
must_scorers.push_back(std::move(scorer));
117+
}
116118
return Ignored {};
117119
} else {
118120
return Required {scorer_disjunction(std::move(should_scorers), combiner, adjusted_minimum)};
@@ -130,43 +132,83 @@ ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
130132
return into_box_scorer(std::move(specialized_scorer), do_nothing);
131133
}
132134

135+
template <typename ScoreCombinerPtrT>
136+
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
137+
std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
138+
if (must_scorers.empty()) {
139+
if (must_num_all_scorers > 0) {
140+
return std::make_shared<AllScorer>(_max_doc);
141+
}
142+
return nullptr;
143+
}
144+
return make_intersect_scorers(std::move(must_scorers), _max_doc);
145+
}
146+
147+
template <typename ScoreCombinerPtrT>
148+
template <typename CombinerT>
149+
SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::effective_should_scorer_for_union(
150+
SpecializedScorer should_scorer, size_t should_num_all_scorers, CombinerT combiner) {
151+
if (should_num_all_scorers > 0) {
152+
if (_enable_scoring) {
153+
std::vector<ScorerPtr> scorers;
154+
scorers.push_back(into_box_scorer(std::move(should_scorer), combiner));
155+
scorers.push_back(std::make_shared<AllScorer>(_max_doc));
156+
return make_buffered_union(std::move(scorers), combiner);
157+
} else {
158+
return std::make_shared<AllScorer>(_max_doc);
159+
}
160+
}
161+
return should_scorer;
162+
}
163+
133164
template <typename ScoreCombinerPtrT>
134165
template <typename CombinerT>
135166
SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::build_positive_opt(
136167
CombinationMethod& should_opt, std::vector<ScorerPtr> must_scorers, CombinerT combiner,
137-
size_t num_all_scorers) {
138-
const bool has_must = !must_scorers.empty();
168+
const AllAndEmptyScorerCounts& must_special_counts,
169+
const AllAndEmptyScorerCounts& should_special_counts) {
170+
size_t num_all_scorers =
171+
must_special_counts.num_all_scorers + should_special_counts.num_all_scorers;
139172
if (std::holds_alternative<Ignored>(should_opt)) {
140-
if (has_must) {
141-
return make_intersect_scorers(std::move(must_scorers), _max_doc);
142-
}
143-
if (num_all_scorers > 0) {
144-
return std::make_shared<AllScorer>(_max_doc);
173+
ScorerPtr must_scorer = effective_must_scorer(std::move(must_scorers), num_all_scorers);
174+
if (must_scorer) {
175+
return must_scorer;
145176
}
146177
return std::make_shared<EmptyScorer>();
147178
}
148179

149180
if (std::holds_alternative<Optional>(should_opt)) {
150181
auto& opt = std::get<Optional>(should_opt);
151-
if (has_must) {
152-
auto must_scorer = make_intersect_scorers(std::move(must_scorers), _max_doc);
153-
if (_enable_scoring) {
154-
auto should_boxed = into_box_scorer(std::move(opt.scorer), combiner);
155-
return make_required_optional_scorer(must_scorer, should_boxed, combiner);
156-
} else {
157-
return must_scorer;
158-
}
182+
ScorerPtr must_scorer =
183+
effective_must_scorer(std::move(must_scorers), must_special_counts.num_all_scorers);
184+
185+
if (!must_scorer) {
186+
return effective_should_scorer_for_union(
187+
std::move(opt.scorer), should_special_counts.num_all_scorers, combiner);
188+
}
189+
190+
if (_enable_scoring) {
191+
auto should_boxed = into_box_scorer(std::move(opt.scorer), combiner);
192+
return make_required_optional_scorer(must_scorer, should_boxed, combiner);
193+
} else {
194+
return must_scorer;
159195
}
160-
return opt.scorer;
161196
}
162197

163198
if (std::holds_alternative<Required>(should_opt)) {
164199
auto& req = std::get<Required>(should_opt);
165-
if (has_must) {
166-
must_scorers.push_back(into_box_scorer(std::move(req.scorer), combiner));
167-
return make_intersect_scorers(std::move(must_scorers), _max_doc);
200+
ScorerPtr must_scorer =
201+
effective_must_scorer(std::move(must_scorers), must_special_counts.num_all_scorers);
202+
203+
if (!must_scorer) {
204+
return req.scorer;
168205
}
169-
return req.scorer;
206+
207+
auto should_boxed = into_box_scorer(std::move(req.scorer), combiner);
208+
std::vector<ScorerPtr> scorers;
209+
scorers.push_back(std::move(must_scorer));
210+
scorers.push_back(std::move(should_boxed));
211+
return make_intersect_scorers(std::move(scorers), _max_doc);
170212
}
171213

172214
return std::make_shared<EmptyScorer>();
@@ -200,10 +242,9 @@ SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::complex_scorer(
200242
}
201243

202244
ScorerPtr exclude_opt = build_exclude_opt(std::move(must_not_scorers));
203-
size_t total_all_scorers =
204-
must_special_counts.num_all_scorers + should_special_counts.num_all_scorers;
205245
SpecializedScorer positive_opt =
206-
build_positive_opt(*should_opt, std::move(must_scorers), combiner, total_all_scorers);
246+
build_positive_opt(*should_opt, std::move(must_scorers), combiner, must_special_counts,
247+
should_special_counts);
207248
if (exclude_opt) {
208249
ScorerPtr positive_boxed = into_box_scorer(std::move(positive_opt), combiner);
209250
return make_exclude(std::move(positive_boxed), std::move(exclude_opt));

be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,20 @@ class OccurBooleanWeight : public Weight {
6363
std::vector<ScorerPtr> should_scorers,
6464
CombinerT combiner, size_t num_all_scorers);
6565
ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers);
66+
67+
ScorerPtr effective_must_scorer(std::vector<ScorerPtr> must_scorers,
68+
size_t must_num_all_scorers);
69+
70+
template <typename CombinerT>
71+
SpecializedScorer effective_should_scorer_for_union(SpecializedScorer should_scorer,
72+
size_t should_num_all_scorers,
73+
CombinerT combiner);
74+
6675
template <typename CombinerT>
6776
SpecializedScorer build_positive_opt(CombinationMethod& should_opt,
6877
std::vector<ScorerPtr> must_scorers, CombinerT combiner,
69-
size_t num_all_scorers = 0);
78+
const AllAndEmptyScorerCounts& must_special_counts,
79+
const AllAndEmptyScorerCounts& should_special_counts);
7080

7181
template <typename CombinerT>
7282
SpecializedScorer scorer_union(std::vector<ScorerPtr> scorers, CombinerT combiner);

be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,19 @@ class RegexpQuery : public Query {
3232
~RegexpQuery() override = default;
3333

3434
WeightPtr weight(bool enable_scoring) override {
35+
auto pattern = make_exact_match(_pattern);
3536
return std::make_shared<RegexpWeight>(std::move(_context), std::move(_field),
36-
std::move(_pattern), enable_scoring, _nullable);
37+
std::move(pattern), enable_scoring, _nullable);
3738
}
3839

3940
private:
41+
static std::string make_exact_match(const std::string& pattern) {
42+
if (!pattern.empty() && pattern.front() == '^' && pattern.back() == '$') {
43+
return pattern;
44+
}
45+
return "^(" + pattern + ")$";
46+
}
47+
4048
IndexQueryContextPtr _context;
4149

4250
std::wstring _field;

0 commit comments

Comments
 (0)