Skip to content

Commit b4e78ba

Browse files
authored
[feature](inverted index) Implement es-like boolean query (#58545)
1 parent 83aafd6 commit b4e78ba

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+5261
-456
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include <algorithm>
21+
#include <memory>
22+
23+
#include "olap/rowset/segment_v2/inverted_index/query_v2/query.h"
24+
#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
25+
#include "olap/rowset/segment_v2/inverted_index/query_v2/weight.h"
26+
27+
namespace doris::segment_v2::inverted_index::query_v2 {
28+
29+
class AllScorer;
30+
class AllWeight;
31+
class AllQuery;
32+
33+
using AllScorerPtr = std::shared_ptr<AllScorer>;
34+
using AllWeightPtr = std::shared_ptr<AllWeight>;
35+
using AllQueryPtr = std::shared_ptr<AllQuery>;
36+
37+
class AllScorer : public Scorer {
38+
public:
39+
explicit AllScorer(uint32_t max_doc) : _max_doc(max_doc) {
40+
if (_max_doc == 0) {
41+
_doc = TERMINATED;
42+
} else {
43+
_doc = 0;
44+
}
45+
}
46+
47+
~AllScorer() override = default;
48+
49+
uint32_t doc() const override { return _doc; }
50+
51+
uint32_t advance() override {
52+
if (_doc == TERMINATED) {
53+
return TERMINATED;
54+
}
55+
if (_doc + 1 >= _max_doc) {
56+
_doc = TERMINATED;
57+
return TERMINATED;
58+
}
59+
++_doc;
60+
return _doc;
61+
}
62+
63+
uint32_t seek(uint32_t target) override {
64+
if (_doc == TERMINATED) {
65+
return TERMINATED;
66+
}
67+
if (target >= _max_doc) {
68+
_doc = TERMINATED;
69+
return TERMINATED;
70+
}
71+
_doc = std::max(_doc, target);
72+
return _doc;
73+
}
74+
75+
float score() override { return 1.0F; }
76+
77+
uint32_t size_hint() const override { return _max_doc; }
78+
79+
private:
80+
uint32_t _max_doc = 0;
81+
uint32_t _doc = TERMINATED;
82+
};
83+
84+
class AllWeight : public Weight {
85+
public:
86+
explicit AllWeight(uint32_t max_doc) : _max_doc(max_doc) {}
87+
88+
~AllWeight() override = default;
89+
90+
ScorerPtr scorer(const QueryExecutionContext& context) override {
91+
return std::make_shared<AllScorer>(_max_doc);
92+
}
93+
94+
private:
95+
uint32_t _max_doc = 0;
96+
};
97+
98+
class AllQuery : public Query {
99+
public:
100+
explicit AllQuery(uint32_t max_doc) : _max_doc(max_doc) {}
101+
102+
~AllQuery() override = default;
103+
104+
WeightPtr weight(bool /*enable_scoring*/) override {
105+
return std::make_shared<AllWeight>(_max_doc);
106+
}
107+
108+
private:
109+
uint32_t _max_doc = 0;
110+
};
111+
112+
} // namespace doris::segment_v2::inverted_index::query_v2
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include <vector>
21+
22+
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
23+
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_query.h"
24+
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/operator_boolean_query.h"
25+
#include "olap/rowset/segment_v2/inverted_index/query_v2/query.h"
26+
27+
namespace doris::segment_v2::inverted_index::query_v2 {
28+
29+
class OccurBooleanQueryBuilder {
30+
public:
31+
OccurBooleanQueryBuilder() = default;
32+
~OccurBooleanQueryBuilder() = default;
33+
34+
void add(const QueryPtr& query, Occur occur) { _sub_queries.emplace_back(occur, query); }
35+
36+
void set_minimum_number_should_match(size_t value) { _minimum_number_should_match = value; }
37+
38+
QueryPtr build() {
39+
if (_minimum_number_should_match.has_value()) {
40+
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
41+
_minimum_number_should_match.value());
42+
}
43+
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries));
44+
}
45+
46+
private:
47+
std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
48+
std::optional<size_t> _minimum_number_should_match;
49+
};
50+
51+
using OccurBooleanQueryBuilderPtr = std::shared_ptr<OccurBooleanQueryBuilder>;
52+
53+
class OperatorBooleanQueryBuilder {
54+
public:
55+
OperatorBooleanQueryBuilder(OperatorType type) : _type(type) {}
56+
~OperatorBooleanQueryBuilder() = default;
57+
58+
void add(const QueryPtr& query, std::string binding_key = {}) {
59+
_sub_queries.emplace_back(query);
60+
_binding_keys.emplace_back(std::move(binding_key));
61+
}
62+
63+
QueryPtr build() {
64+
return std::make_shared<OperatorBooleanQuery>(_type, std::move(_sub_queries),
65+
std::move(_binding_keys));
66+
}
67+
68+
private:
69+
OperatorType _type;
70+
std::vector<QueryPtr> _sub_queries;
71+
std::vector<std::string> _binding_keys;
72+
};
73+
74+
using OperatorBooleanQueryBuilderPtr = std::shared_ptr<OperatorBooleanQueryBuilder>;
75+
76+
inline OccurBooleanQueryBuilderPtr create_occur_boolean_query_builder() {
77+
return std::make_shared<OccurBooleanQueryBuilder>();
78+
}
79+
80+
inline OperatorBooleanQueryBuilderPtr create_operator_boolean_query_builder(OperatorType type) {
81+
return std::make_shared<OperatorBooleanQueryBuilder>(type);
82+
}
83+
84+
} // namespace doris::segment_v2::inverted_index::query_v2
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
namespace doris::segment_v2::inverted_index::query_v2 {
21+
22+
enum class Occur { MUST = 0, SHOULD = 1, MUST_NOT = 2 };
23+
24+
} // namespace doris::segment_v2::inverted_index::query_v2
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
21+
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h"
22+
#include "olap/rowset/segment_v2/inverted_index/query_v2/query.h"
23+
#include "olap/rowset/segment_v2/inverted_index/query_v2/score_combiner.h"
24+
25+
namespace doris::segment_v2::inverted_index::query_v2 {
26+
27+
class OccurBooleanQuery;
28+
using OccurBooleanQueryPtr = std::shared_ptr<OccurBooleanQuery>;
29+
30+
class OccurBooleanQuery : public Query {
31+
public:
32+
explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses)
33+
: _sub_queries(std::move(clauses)),
34+
_minimum_number_should_match(compute_default_minimum_should_match(_sub_queries)) {}
35+
36+
OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
37+
size_t minimum_number_should_match)
38+
: _sub_queries(std::move(clauses)),
39+
_minimum_number_should_match(minimum_number_should_match) {}
40+
41+
~OccurBooleanQuery() override = default;
42+
43+
WeightPtr weight(bool enable_scoring) override {
44+
std::vector<std::pair<Occur, WeightPtr>> sub_weights;
45+
sub_weights.reserve(_sub_queries.size());
46+
for (const auto& [occur, query] : _sub_queries) {
47+
sub_weights.emplace_back(occur, query->weight(enable_scoring));
48+
}
49+
return std::make_shared<OccurBooleanWeight<SumCombinerPtr>>(
50+
std::move(sub_weights), _minimum_number_should_match, enable_scoring,
51+
std::make_shared<SumCombiner>());
52+
}
53+
54+
const std::vector<std::pair<Occur, QueryPtr>>& clauses() const { return _sub_queries; }
55+
size_t minimum_number_should_match() const { return _minimum_number_should_match; }
56+
57+
private:
58+
static size_t compute_default_minimum_should_match(
59+
const std::vector<std::pair<Occur, QueryPtr>>& clauses) {
60+
size_t minimum_required = 0;
61+
for (const auto& [occur, _] : clauses) {
62+
if (occur == Occur::SHOULD) {
63+
minimum_required = 1;
64+
} else if (occur == Occur::MUST || occur == Occur::MUST_NOT) {
65+
return 0;
66+
}
67+
}
68+
return minimum_required;
69+
}
70+
71+
std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
72+
size_t _minimum_number_should_match = 0;
73+
};
74+
75+
} // namespace doris::segment_v2::inverted_index::query_v2

0 commit comments

Comments
 (0)