Skip to content

Commit 8e105c4

Browse files
zzzxl1993Your Name
authored andcommitted
[fix](inverted index) fix multi-position phrase query handling in MultiPhraseQuery (#57993)
apache/doris-website#3114
1 parent 0cb509d commit 8e105c4

File tree

8 files changed

+258
-56
lines changed

8 files changed

+258
-56
lines changed

be/src/olap/rowset/segment_v2/inverted_index/query/query_helper.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,34 @@ bool QueryHelper::is_simple_phrase(const std::vector<TermInfo>& term_infos) {
7474
[](const auto& term_info) { return term_info.is_single_term(); });
7575
}
7676

77+
std::vector<TermInfo> QueryHelper::build_phrase_term_infos(const std::vector<TermInfo>& src) {
78+
std::vector<TermInfo> dst;
79+
dst.reserve(src.size());
80+
size_t idx = 0;
81+
while (idx < src.size()) {
82+
int32_t pos = src[idx].position;
83+
std::vector<std::string> group_terms;
84+
while (idx < src.size() && src[idx].position == pos) {
85+
const auto& info = src[idx];
86+
if (info.is_single_term()) {
87+
group_terms.emplace_back(info.get_single_term());
88+
} else {
89+
const auto& terms = info.get_multi_terms();
90+
group_terms.insert(group_terms.end(), terms.begin(), terms.end());
91+
}
92+
++idx;
93+
}
94+
TermInfo t;
95+
t.position = pos;
96+
if (group_terms.size() == 1) {
97+
t.term = std::move(group_terms[0]);
98+
} else {
99+
t.term = std::move(group_terms);
100+
}
101+
dst.emplace_back(std::move(t));
102+
}
103+
return dst;
104+
}
105+
77106
#include "common/compile_check_end.h"
78107
} // namespace doris::segment_v2

be/src/olap/rowset/segment_v2/inverted_index/query/query_helper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#pragma once
1919

2020
#include <memory>
21+
#include <vector>
2122

2223
#include "olap/rowset/segment_v2/inverted_index/query/query.h"
24+
#include "olap/rowset/segment_v2/inverted_index/query/query_info.h"
2325

2426
namespace doris::segment_v2 {
2527
#include "common/compile_check_begin.h"
@@ -39,6 +41,7 @@ class QueryHelper {
3941
const DocRange& doc_range);
4042

4143
static bool is_simple_phrase(const std::vector<TermInfo>& term_infos);
44+
static std::vector<TermInfo> build_phrase_term_infos(const std::vector<TermInfo>& src);
4245
};
4346

4447
#include "common/compile_check_end.h"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "olap/rowset/segment_v2/inverted_index_common.h"
19+
20+
#include <CLucene.h>
21+
22+
namespace doris::segment_v2 {
23+
24+
void DirectoryDeleter::operator()(lucene::store::Directory* p) const {
25+
if (p != nullptr) {
26+
_CLDECDELETE(p);
27+
}
28+
}
29+
30+
void TermDeleter::operator()(lucene::index::Term* p) const {
31+
if (p != nullptr) {
32+
_CLDECDELETE(p);
33+
}
34+
}
35+
36+
void CLuceneDeleter::operator()(lucene::index::TermDocs* p) const {
37+
if (p != nullptr) {
38+
_CLLDELETE(p);
39+
}
40+
}
41+
42+
} // namespace doris::segment_v2

be/src/olap/rowset/segment_v2/inverted_index_common.h

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -17,47 +17,50 @@
1717

1818
#pragma once
1919

20-
#include <CLucene.h> // IWYU pragma: keep
21-
20+
#include <exception>
2221
#include <memory>
22+
#include <string>
23+
24+
namespace lucene {
25+
namespace store {
26+
class Directory;
27+
} // namespace store
28+
29+
namespace index {
30+
class Term;
31+
class TermDocs;
32+
class TermPositions;
33+
class IndexReader;
34+
} // namespace index
35+
} // namespace lucene
2336

24-
#include "common/logging.h"
37+
class CLuceneError;
2538

2639
namespace doris::segment_v2 {
2740

2841
struct DirectoryDeleter {
29-
void operator()(lucene::store::Directory* ptr) const { _CLDECDELETE(ptr); }
42+
void operator()(lucene::store::Directory* p) const;
3043
};
3144

3245
struct TermDeleter {
33-
void operator()(lucene::index::Term* p) const { _CLDECDELETE(p); }
46+
void operator()(lucene::index::Term* p) const;
3447
};
3548
using TermPtr = std::unique_ptr<lucene::index::Term, TermDeleter>;
3649

3750
template <typename... Args>
38-
TermPtr make_term_ptr(Args&&... args) {
39-
return TermPtr(new lucene::index::Term(std::forward<Args>(args)...));
40-
}
51+
TermPtr make_term_ptr(Args&&... args);
4152

4253
struct CLuceneDeleter {
43-
void operator()(lucene::index::TermDocs* p) const {
44-
if (p) {
45-
_CLDELETE(p);
46-
}
47-
}
54+
void operator()(lucene::index::TermDocs* p) const;
4855
};
4956
using TermDocsPtr = std::unique_ptr<lucene::index::TermDocs, CLuceneDeleter>;
5057
using TermPositionsPtr = std::unique_ptr<lucene::index::TermPositions, CLuceneDeleter>;
5158

5259
template <typename... Args>
53-
TermDocsPtr make_term_doc_ptr(lucene::index::IndexReader* reader, Args&&... args) {
54-
return TermDocsPtr(reader->termDocs(std::forward<Args>(args)...));
55-
}
60+
TermDocsPtr make_term_doc_ptr(lucene::index::IndexReader* reader, Args&&... args);
5661

5762
template <typename... Args>
58-
TermPositionsPtr make_term_positions_ptr(lucene::index::IndexReader* reader, Args&&... args) {
59-
return TermPositionsPtr(reader->termPositions(std::forward<Args>(args)...));
60-
}
63+
TermPositionsPtr make_term_positions_ptr(lucene::index::IndexReader* reader, Args&&... args);
6164

6265
struct ErrorContext {
6366
std::string err_msg;
@@ -71,22 +74,7 @@ concept HasClose = requires(T t) {
7174

7275
template <typename PtrType>
7376
requires HasClose<PtrType>
74-
void finally_close(PtrType& resource, ErrorContext& error_context) {
75-
if (resource) {
76-
try {
77-
resource->close();
78-
} catch (CLuceneError& err) {
79-
error_context.eptr = std::current_exception();
80-
error_context.err_msg.append("Error occurred while closing resource: ");
81-
error_context.err_msg.append(err.what());
82-
LOG(ERROR) << error_context.err_msg;
83-
} catch (...) {
84-
error_context.eptr = std::current_exception();
85-
error_context.err_msg.append("Error occurred while closing resource");
86-
LOG(ERROR) << error_context.err_msg;
87-
}
88-
}
89-
}
77+
void finally_close(PtrType& resource, ErrorContext& error_context);
9078

9179
#if defined(__clang__)
9280
#pragma clang diagnostic push
@@ -126,4 +114,6 @@ void finally_close(PtrType& resource, ErrorContext& error_context) {
126114
#pragma clang diagnostic pop
127115
#endif
128116

129-
} // namespace doris::segment_v2
117+
} // namespace doris::segment_v2
118+
119+
#include "inverted_index_common_impl.h"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include <CLucene.h>
21+
22+
#include "common/logging.h"
23+
#include "olap/rowset/segment_v2/inverted_index_common.h"
24+
25+
namespace doris::segment_v2 {
26+
27+
template <typename... Args>
28+
TermPtr make_term_ptr(Args&&... args) {
29+
return TermPtr(new lucene::index::Term(std::forward<Args>(args)...));
30+
}
31+
32+
template <typename... Args>
33+
TermDocsPtr make_term_doc_ptr(lucene::index::IndexReader* reader, Args&&... args) {
34+
return TermDocsPtr(reader->termDocs(std::forward<Args>(args)...));
35+
}
36+
37+
template <typename... Args>
38+
TermPositionsPtr make_term_positions_ptr(lucene::index::IndexReader* reader, Args&&... args) {
39+
return TermPositionsPtr(reader->termPositions(std::forward<Args>(args)...));
40+
}
41+
42+
template <typename PtrType>
43+
requires HasClose<PtrType>
44+
void finally_close(PtrType& resource, ErrorContext& error_context) {
45+
if (resource) {
46+
try {
47+
resource->close();
48+
} catch (CLuceneError& err) {
49+
error_context.eptr = std::current_exception();
50+
error_context.err_msg.append("Error occurred while closing resource: ");
51+
error_context.err_msg.append(err.what());
52+
LOG(ERROR) << error_context.err_msg;
53+
} catch (...) {
54+
error_context.eptr = std::current_exception();
55+
error_context.err_msg.append("Error occurred while closing resource");
56+
LOG(ERROR) << error_context.err_msg;
57+
}
58+
}
59+
}
60+
61+
} // namespace doris::segment_v2

be/src/vec/functions/function_search.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -566,22 +566,22 @@ Status FunctionSearch::build_leaf_query(const TSearchClause& clause,
566566
<< "', returning empty BitSetQuery";
567567
*out = std::make_shared<query_v2::BitSetQuery>(roaring::Roaring());
568568
return Status::OK();
569-
} else if (term_infos.size() == 1) {
570-
if (term_infos.size() == 1) {
571-
const auto& term_info = term_infos[0];
572-
if (term_info.is_single_term()) {
573-
std::wstring term_wstr =
574-
StringHelper::to_wstring(term_info.get_single_term());
575-
*out = std::make_shared<query_v2::TermQuery>(context, field_wstr,
576-
term_wstr);
577-
} else {
578-
query_v2::BooleanQuery::Builder builder(query_v2::OperatorType::OP_OR);
579-
for (const auto& term : term_info.get_multi_terms()) {
580-
std::wstring term_wstr = StringHelper::to_wstring(term);
581-
builder.add(make_term_query(term_wstr), binding.binding_key);
582-
}
583-
*out = builder.build();
569+
}
570+
571+
std::vector<TermInfo> phrase_term_infos =
572+
QueryHelper::build_phrase_term_infos(term_infos);
573+
if (phrase_term_infos.size() == 1) {
574+
const auto& term_info = term_infos[0];
575+
if (term_info.is_single_term()) {
576+
std::wstring term_wstr = StringHelper::to_wstring(term_info.get_single_term());
577+
*out = std::make_shared<query_v2::TermQuery>(context, field_wstr, term_wstr);
578+
} else {
579+
query_v2::BooleanQuery::Builder builder(query_v2::OperatorType::OP_OR);
580+
for (const auto& term : term_info.get_multi_terms()) {
581+
std::wstring term_wstr = StringHelper::to_wstring(term);
582+
builder.add(make_term_query(term_wstr), binding.binding_key);
584583
}
584+
*out = builder.build();
585585
}
586586
} else {
587587
if (QueryHelper::is_simple_phrase(term_infos)) {

be/test/olap/rowset/segment_v2/index_reader_helper_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
using namespace doris;
3434
using namespace doris::segment_v2;
3535

36-
class MockIndexReader : public IndexReader {
36+
class MockIndexReader : public segment_v2::IndexReader {
3737
public:
3838
MockIndexReader(IndexType type, uint64_t id) : _type(type), _id(id) {}
3939

0 commit comments

Comments
 (0)