Skip to content

Commit eac615d

Browse files
committed
[feature](search) introduce lucene bool mode for search function
1 parent 73fe371 commit eac615d

File tree

7 files changed

+1320
-25
lines changed

7 files changed

+1320
-25
lines changed

be/src/vec/functions/function_search.cpp

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,8 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
317317
// Aligned with FE QsClauseType enum - uses enum.name() as clause_type
318318
FunctionSearch::ClauseTypeCategory FunctionSearch::get_clause_type_category(
319319
const std::string& clause_type) const {
320-
if (clause_type == "AND" || clause_type == "OR" || clause_type == "NOT") {
320+
if (clause_type == "AND" || clause_type == "OR" || clause_type == "NOT" ||
321+
clause_type == "OCCUR_BOOLEAN") {
321322
return ClauseTypeCategory::COMPOUND;
322323
} else if (clause_type == "TERM" || clause_type == "PREFIX" || clause_type == "WILDCARD" ||
323324
clause_type == "REGEXP" || clause_type == "RANGE" || clause_type == "LIST" ||
@@ -377,6 +378,7 @@ InvertedIndexQueryType FunctionSearch::clause_type_to_query_type(
377378
{"AND", InvertedIndexQueryType::BOOLEAN_QUERY},
378379
{"OR", InvertedIndexQueryType::BOOLEAN_QUERY},
379380
{"NOT", InvertedIndexQueryType::BOOLEAN_QUERY},
381+
{"OCCUR_BOOLEAN", InvertedIndexQueryType::BOOLEAN_QUERY},
380382

381383
// Non-tokenized queries (exact matching, pattern matching)
382384
{"TERM", InvertedIndexQueryType::EQUAL_QUERY},
@@ -406,6 +408,20 @@ InvertedIndexQueryType FunctionSearch::clause_type_to_query_type(
406408
return InvertedIndexQueryType::EQUAL_QUERY;
407409
}
408410

411+
// Map Thrift TSearchOccur to query_v2::Occur
412+
static query_v2::Occur map_thrift_occur(TSearchOccur::type thrift_occur) {
413+
switch (thrift_occur) {
414+
case TSearchOccur::MUST:
415+
return query_v2::Occur::MUST;
416+
case TSearchOccur::SHOULD:
417+
return query_v2::Occur::SHOULD;
418+
case TSearchOccur::MUST_NOT:
419+
return query_v2::Occur::MUST_NOT;
420+
default:
421+
return query_v2::Occur::MUST;
422+
}
423+
}
424+
409425
Status FunctionSearch::build_query_recursive(const TSearchClause& clause,
410426
const std::shared_ptr<IndexQueryContext>& context,
411427
FieldReaderResolver& resolver,
@@ -418,6 +434,38 @@ Status FunctionSearch::build_query_recursive(const TSearchClause& clause,
418434
}
419435

420436
const std::string& clause_type = clause.clause_type;
437+
438+
// Handle OCCUR_BOOLEAN - Lucene-style boolean query with MUST/SHOULD/MUST_NOT
439+
if (clause_type == "OCCUR_BOOLEAN") {
440+
auto builder = segment_v2::inverted_index::query_v2::create_occur_boolean_query_builder();
441+
442+
// Set minimum_should_match if specified
443+
if (clause.__isset.minimum_should_match) {
444+
builder->set_minimum_number_should_match(clause.minimum_should_match);
445+
}
446+
447+
if (clause.__isset.children) {
448+
for (const auto& child_clause : clause.children) {
449+
query_v2::QueryPtr child_query;
450+
std::string child_binding_key;
451+
RETURN_IF_ERROR(build_query_recursive(child_clause, context, resolver, &child_query,
452+
&child_binding_key));
453+
454+
// Determine occur type from child clause
455+
query_v2::Occur occur = query_v2::Occur::MUST; // default
456+
if (child_clause.__isset.occur) {
457+
occur = map_thrift_occur(child_clause.occur);
458+
}
459+
460+
builder->add(child_query, occur);
461+
}
462+
}
463+
464+
*out = builder->build();
465+
return Status::OK();
466+
}
467+
468+
// Handle standard boolean operators (AND/OR/NOT)
421469
if (clause_type == "AND" || clause_type == "OR" || clause_type == "NOT") {
422470
query_v2::OperatorType op = query_v2::OperatorType::OP_AND;
423471
if (clause_type == "OR") {

0 commit comments

Comments
 (0)