Skip to content

Commit d4add24

Browse files
authored
Wildcard preparation 2 (iresearch-toolkit#579)
* WIP * WIP
1 parent 1605954 commit d4add24

17 files changed

+723
-680
lines changed

core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ set(IResearch_core_sources
132132
./search/multiterm_query.cpp
133133
./search/term_query.cpp
134134
./search/boolean_filter.cpp
135+
./search/boolean_query.cpp
135136
./search/ngram_similarity_filter.cpp
136137
./search/ngram_similarity_query.cpp
137138
./search/proxy_filter.cpp

core/search/boolean_filter.cpp

Lines changed: 1 addition & 263 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "exclusion.hpp"
3030
#include "min_match_disjunction.hpp"
3131
#include "prepared_state_visitor.hpp"
32+
#include "search/boolean_query.hpp"
3233

3334
namespace {
3435

@@ -45,273 +46,10 @@ std::pair<const irs::filter*, bool> optimize_not(const irs::Not& node) {
4546
return std::make_pair(inner, neg);
4647
}
4748

48-
template<bool Conjunction, typename It>
49-
irs::ScoreAdapters<irs::doc_iterator::ptr> MakeScoreAdapters(
50-
const irs::ExecutionContext& ctx, It begin, It end) {
51-
IRS_ASSERT(begin <= end);
52-
const size_t size = std::distance(begin, end);
53-
irs::ScoreAdapters<irs::doc_iterator::ptr> itrs;
54-
itrs.reserve(size);
55-
if (Conjunction || size > 1) {
56-
ctx.wand.root = false;
57-
// TODO(MBkkt) ctx.wand.strict = true;
58-
// We couldn't do this for few reasons:
59-
// 1. It's small chance that we will use just term iterator (or + eof)
60-
// 2. I'm not sure about precision
61-
}
62-
do {
63-
auto docs = (*begin)->execute(ctx);
64-
++begin;
65-
66-
// filter out empty iterators
67-
if (irs::doc_limits::eof(docs->value())) {
68-
if constexpr (Conjunction) {
69-
return {};
70-
} else {
71-
continue;
72-
}
73-
}
74-
75-
itrs.emplace_back(std::move(docs));
76-
} while (begin != end);
77-
78-
return itrs;
79-
}
80-
81-
// Returns disjunction iterator created from the specified queries
82-
template<typename QueryIterator, typename... Args>
83-
irs::doc_iterator::ptr make_disjunction(const irs::ExecutionContext& ctx,
84-
irs::ScoreMergeType merge_type,
85-
QueryIterator begin, QueryIterator end,
86-
Args&&... args) {
87-
IRS_ASSERT(begin <= end);
88-
const size_t size = std::distance(begin, end);
89-
// check the size before the execution
90-
if (0 == size) {
91-
// empty or unreachable search criteria
92-
return irs::doc_iterator::empty();
93-
}
94-
95-
auto itrs = MakeScoreAdapters<false>(ctx, begin, end);
96-
if (itrs.empty()) {
97-
return irs::doc_iterator::empty();
98-
}
99-
100-
return irs::ResoveMergeType(
101-
merge_type, ctx.scorers.buckets().size(),
102-
[&]<typename A>(A&& aggregator) -> irs::doc_iterator::ptr {
103-
using disjunction_t =
104-
irs::disjunction_iterator<irs::doc_iterator::ptr, A>;
105-
106-
return irs::MakeDisjunction<disjunction_t>(ctx.wand, std::move(itrs),
107-
std::move(aggregator),
108-
std::forward<Args>(args)...);
109-
});
110-
}
111-
112-
// Returns conjunction iterator created from the specified queries
113-
template<typename QueryIterator, typename... Args>
114-
irs::doc_iterator::ptr make_conjunction(const irs::ExecutionContext& ctx,
115-
irs::ScoreMergeType merge_type,
116-
QueryIterator begin, QueryIterator end,
117-
Args&&... args) {
118-
IRS_ASSERT(begin <= end);
119-
const size_t size = std::distance(begin, end);
120-
// check size before the execution
121-
switch (size) {
122-
case 0:
123-
return irs::doc_iterator::empty();
124-
case 1:
125-
return (*begin)->execute(ctx);
126-
}
127-
128-
auto itrs = MakeScoreAdapters<true>(ctx, begin, end);
129-
if (itrs.empty()) {
130-
return irs::doc_iterator::empty();
131-
}
132-
133-
return irs::ResoveMergeType(
134-
merge_type, ctx.scorers.buckets().size(),
135-
[&]<typename A>(A&& aggregator) -> irs::doc_iterator::ptr {
136-
return irs::MakeConjunction(ctx.wand, std::move(aggregator),
137-
std::move(itrs), std::forward<Args>(args)...);
138-
});
139-
}
140-
14149
} // namespace
14250

14351
namespace irs {
14452

145-
// Base class for boolean queries
146-
class BooleanQuery : public filter::prepared {
147-
public:
148-
using queries_t = ManagedVector<filter::prepared::ptr>;
149-
using iterator = queries_t::const_iterator;
150-
151-
BooleanQuery() noexcept : excl_{0} {}
152-
153-
doc_iterator::ptr execute(const ExecutionContext& ctx) const final {
154-
if (empty()) {
155-
return doc_iterator::empty();
156-
}
157-
158-
IRS_ASSERT(excl_);
159-
const auto excl_begin = this->excl_begin();
160-
const auto end = this->end();
161-
162-
auto incl = execute(ctx, begin(), excl_begin);
163-
164-
if (excl_begin == end) {
165-
return incl;
166-
}
167-
168-
// exclusion part does not affect scoring at all
169-
auto excl = ::make_disjunction(
170-
{.segment = ctx.segment, .scorers = Scorers::kUnordered, .ctx = ctx.ctx},
171-
irs::ScoreMergeType::kSum, excl_begin, end);
172-
173-
// got empty iterator for excluded
174-
if (doc_limits::eof(excl->value())) {
175-
// pure conjunction/disjunction
176-
return incl;
177-
}
178-
179-
return memory::make_managed<exclusion>(std::move(incl), std::move(excl));
180-
}
181-
182-
void visit(const irs::SubReader& segment, irs::PreparedStateVisitor& visitor,
183-
score_t boost) const final {
184-
boost *= this->boost();
185-
186-
if (!visitor.Visit(*this, boost)) {
187-
return;
188-
}
189-
190-
// FIXME(gnusi): visit exclude group?
191-
for (auto it = begin(), end = excl_begin(); it != end; ++it) {
192-
(*it)->visit(segment, visitor, boost);
193-
}
194-
}
195-
196-
void prepare(const PrepareContext& ctx, ScoreMergeType merge_type,
197-
std::span<const filter* const> incl,
198-
std::span<const filter* const> excl) {
199-
BooleanQuery::queries_t queries{{ctx.memory}};
200-
queries.reserve(incl.size() + excl.size());
201-
202-
// apply boost to the current node
203-
this->boost(ctx.boost);
204-
205-
// prepare included
206-
for (const auto* filter : incl) {
207-
queries.emplace_back(filter->prepare(ctx));
208-
}
209-
210-
// prepare excluded
211-
for (const auto* filter : excl) {
212-
// exclusion part does not affect scoring at all
213-
queries.emplace_back(filter->prepare({
214-
.index = ctx.index,
215-
.memory = ctx.memory,
216-
.ctx = ctx.ctx,
217-
}));
218-
}
219-
220-
// nothrow block
221-
queries_ = std::move(queries);
222-
excl_ = incl.size();
223-
merge_type_ = merge_type;
224-
}
225-
226-
iterator begin() const { return queries_.begin(); }
227-
iterator excl_begin() const { return begin() + excl_; }
228-
iterator end() const { return queries_.end(); }
229-
230-
bool empty() const { return queries_.empty(); }
231-
size_t size() const { return queries_.size(); }
232-
233-
protected:
234-
virtual doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
235-
iterator end) const = 0;
236-
237-
ScoreMergeType merge_type() const noexcept { return merge_type_; }
238-
239-
private:
240-
// 0..excl_-1 - included queries
241-
// excl_..queries.end() - excluded queries
242-
queries_t queries_;
243-
// index of the first excluded query
244-
size_t excl_;
245-
ScoreMergeType merge_type_{ScoreMergeType::kSum};
246-
};
247-
248-
// Represent a set of queries joint by "And"
249-
class AndQuery : public BooleanQuery {
250-
public:
251-
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
252-
iterator end) const final {
253-
return ::make_conjunction(ctx, merge_type(), begin, end);
254-
}
255-
};
256-
257-
// Represent a set of queries joint by "Or"
258-
class OrQuery : public BooleanQuery {
259-
public:
260-
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
261-
iterator end) const final {
262-
return ::make_disjunction(ctx, merge_type(), begin, end);
263-
}
264-
};
265-
266-
// Represent a set of queries joint by "Or" with the specified
267-
// minimum number of clauses that should satisfy criteria
268-
class MinMatchQuery : public BooleanQuery {
269-
public:
270-
explicit MinMatchQuery(size_t min_match_count) noexcept
271-
: min_match_count_{min_match_count} {
272-
IRS_ASSERT(min_match_count_ > 1);
273-
}
274-
275-
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
276-
iterator end) const final {
277-
IRS_ASSERT(std::distance(begin, end) >= 0);
278-
const size_t size = size_t(std::distance(begin, end));
279-
280-
// 1 <= min_match_count
281-
size_t min_match_count = std::max(size_t(1), min_match_count_);
282-
283-
// check the size before the execution
284-
if (0 == size || min_match_count > size) {
285-
// empty or unreachable search criteria
286-
return doc_iterator::empty();
287-
} else if (min_match_count == size) {
288-
// pure conjunction
289-
return ::make_conjunction(ctx, merge_type(), begin, end);
290-
}
291-
292-
// min_match_count <= size
293-
min_match_count = std::min(size, min_match_count);
294-
295-
auto itrs = MakeScoreAdapters<false>(ctx, begin, end);
296-
if (itrs.empty()) {
297-
return irs::doc_iterator::empty();
298-
}
299-
300-
return ResoveMergeType(
301-
merge_type(), ctx.scorers.buckets().size(),
302-
[&]<typename A>(A&& aggregator) -> doc_iterator::ptr {
303-
// FIXME(gnusi): use FAST version
304-
using disjunction_t = min_match_iterator<doc_iterator::ptr, A>;
305-
306-
return MakeWeakDisjunction<disjunction_t, A>(
307-
ctx.wand, std::move(itrs), min_match_count, std::move(aggregator));
308-
});
309-
}
310-
311-
private:
312-
size_t min_match_count_;
313-
};
314-
31553
size_t boolean_filter::hash() const noexcept {
31654
size_t seed = 0;
31755

0 commit comments

Comments
 (0)