Skip to content

Commit 38e7ffd

Browse files
authored
[refactor](predicate) Normalize predicates generation (#59187)
This PR refine predicates generation. Previously, predicates are generated on ScanOperator for OlapTable and push down to TabletReader. However, for other types of tables, Exprs are just push down simply and converted to predicates on own file readers. This introduces complexity and overhead for us to maintain. And then, this PR makes all predicates generated on ScanOperator for all tables.
1 parent 4a45bcb commit 38e7ffd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1183
-1315
lines changed

be/src/exec/olap_utils.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,23 +104,23 @@ inline SQLFilterOp to_olap_filter_type(TExprOpcode::type type, bool opposite) {
104104
return FILTER_IN;
105105
}
106106

107-
inline SQLFilterOp to_olap_filter_type(const std::string& function_name, bool opposite) {
107+
inline SQLFilterOp to_olap_filter_type(const std::string& function_name) {
108108
if (function_name == "lt") {
109-
return opposite ? FILTER_LARGER : FILTER_LESS;
109+
return FILTER_LESS;
110110
} else if (function_name == "gt") {
111-
return opposite ? FILTER_LESS : FILTER_LARGER;
111+
return FILTER_LARGER;
112112
} else if (function_name == "le") {
113-
return opposite ? FILTER_LARGER_OR_EQUAL : FILTER_LESS_OR_EQUAL;
113+
return FILTER_LESS_OR_EQUAL;
114114
} else if (function_name == "ge") {
115-
return opposite ? FILTER_LESS_OR_EQUAL : FILTER_LARGER_OR_EQUAL;
115+
return FILTER_LARGER_OR_EQUAL;
116116
} else if (function_name == "eq") {
117-
return opposite ? FILTER_NOT_IN : FILTER_IN;
117+
return FILTER_IN;
118118
} else if (function_name == "ne") {
119-
return opposite ? FILTER_IN : FILTER_NOT_IN;
119+
return FILTER_NOT_IN;
120120
} else if (function_name == "in") {
121-
return opposite ? FILTER_NOT_IN : FILTER_IN;
121+
return FILTER_IN;
122122
} else if (function_name == "not_in") {
123-
return opposite ? FILTER_IN : FILTER_NOT_IN;
123+
return FILTER_NOT_IN;
124124
} else {
125125
DCHECK(false) << "Function Name: " << function_name;
126126
return FILTER_IN;

be/src/exprs/create_predicate_function.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,13 @@ std::shared_ptr<ColumnPredicate> create_olap_column_predicate(
268268
const TabletColumn* column, bool) {
269269
// currently only support like predicate
270270
if constexpr (PT == TYPE_CHAR) {
271-
return LikeColumnPredicate<TYPE_CHAR>::create_shared(
272-
filter->_opposite, column_id, filter->_fn_ctx, filter->_string_param);
271+
return LikeColumnPredicate<TYPE_CHAR>::create_shared(filter->_opposite, column_id,
272+
column->name(), filter->_fn_ctx,
273+
filter->_string_param);
273274
} else if constexpr (PT == TYPE_VARCHAR || PT == TYPE_STRING) {
274-
return LikeColumnPredicate<TYPE_STRING>::create_shared(
275-
filter->_opposite, column_id, filter->_fn_ctx, filter->_string_param);
275+
return LikeColumnPredicate<TYPE_STRING>::create_shared(filter->_opposite, column_id,
276+
column->name(), filter->_fn_ctx,
277+
filter->_string_param);
276278
}
277279
throw Exception(ErrorCode::INTERNAL_ERROR, "function filter do not support type {}", PT);
278280
}

be/src/olap/accept_null_predicate.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class AcceptNullPredicate : public ColumnPredicate {
4141

4242
public:
4343
AcceptNullPredicate(const std::shared_ptr<ColumnPredicate>& nested)
44-
: ColumnPredicate(nested->column_id(), nested->primitive_type(), nested->opposite()),
44+
: ColumnPredicate(nested->column_id(), nested->col_name(), nested->primitive_type(),
45+
nested->opposite()),
4546
_nested {nested} {}
4647
AcceptNullPredicate(const AcceptNullPredicate& other, uint32_t col_id)
4748
: ColumnPredicate(other, col_id),

be/src/olap/bitmap_filter_predicate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ class BitmapFilterColumnPredicate final : public ColumnPredicate {
3333
using CppType = typename PrimitiveTypeTraits<T>::CppType;
3434
using SpecificFilter = BitmapFilterFunc<T>;
3535

36-
BitmapFilterColumnPredicate(uint32_t column_id,
36+
BitmapFilterColumnPredicate(uint32_t column_id, std::string col_name,
3737
const std::shared_ptr<BitmapFilterFuncBase>& filter)
38-
: ColumnPredicate(column_id, T),
38+
: ColumnPredicate(column_id, col_name, T),
3939
_filter(filter),
4040
_specific_filter(assert_cast<SpecificFilter*>(_filter.get())) {}
4141
~BitmapFilterColumnPredicate() override = default;

be/src/olap/bloom_filter_predicate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ class BloomFilterColumnPredicate final : public ColumnPredicate {
3535
ENABLE_FACTORY_CREATOR(BloomFilterColumnPredicate);
3636
using SpecificFilter = BloomFilterFunc<T>;
3737

38-
BloomFilterColumnPredicate(uint32_t column_id,
38+
BloomFilterColumnPredicate(uint32_t column_id, std::string col_name,
3939
const std::shared_ptr<BloomFilterFuncBase>& filter)
40-
: ColumnPredicate(column_id, T),
40+
: ColumnPredicate(column_id, col_name, T),
4141
_filter(filter),
4242
_specific_filter(assert_cast<SpecificFilter*>(_filter.get())) {}
4343
~BloomFilterColumnPredicate() override = default;

be/src/olap/column_predicate.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,12 @@ struct PredicateTypeTraits {
197197

198198
class ColumnPredicate : public std::enable_shared_from_this<ColumnPredicate> {
199199
public:
200-
explicit ColumnPredicate(uint32_t column_id, PrimitiveType primitive_type,
200+
explicit ColumnPredicate(uint32_t column_id, std::string col_name, PrimitiveType primitive_type,
201201
bool opposite = false)
202-
: _column_id(column_id), _primitive_type(primitive_type), _opposite(opposite) {
202+
: _column_id(column_id),
203+
_col_name(col_name),
204+
_primitive_type(primitive_type),
205+
_opposite(opposite) {
203206
reset_judge_selectivity();
204207
}
205208
ColumnPredicate(const ColumnPredicate& other, uint32_t col_id) : ColumnPredicate(other) {
@@ -316,6 +319,7 @@ class ColumnPredicate : public std::enable_shared_from_this<ColumnPredicate> {
316319
DCHECK(false) << "should not reach here";
317320
}
318321
uint32_t column_id() const { return _column_id; }
322+
std::string col_name() const { return _col_name; }
319323

320324
bool opposite() const { return _opposite; }
321325

@@ -421,6 +425,7 @@ class ColumnPredicate : public std::enable_shared_from_this<ColumnPredicate> {
421425
}
422426

423427
uint32_t _column_id;
428+
const std::string _col_name;
424429
PrimitiveType _primitive_type;
425430
// TODO: the value is only in delete condition, better be template value
426431
bool _opposite;

be/src/olap/comparison_predicate.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ class ComparisonPredicateBase final : public ColumnPredicate {
3535
public:
3636
ENABLE_FACTORY_CREATOR(ComparisonPredicateBase);
3737
using T = typename PrimitiveTypeTraits<Type>::CppType;
38-
ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite = false)
39-
: ColumnPredicate(column_id, Type, opposite), _value(value) {}
38+
ComparisonPredicateBase(uint32_t column_id, std::string col_name, const T& value,
39+
bool opposite = false)
40+
: ColumnPredicate(column_id, col_name, Type, opposite), _value(value) {}
4041
ComparisonPredicateBase(const ComparisonPredicateBase<Type, PT>& other, uint32_t col_id)
4142
: ColumnPredicate(other, col_id), _value(other._value) {}
4243
ComparisonPredicateBase(const ComparisonPredicateBase<Type, PT>& other) = delete;

be/src/olap/delete_handler.cpp

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -252,42 +252,44 @@ Status convert(const vectorized::DataTypePtr& data_type, const std::list<std::st
252252
v.size = sizeof(tmp); \
253253
switch (res.condition_op) { \
254254
case PredicateType::EQ: \
255-
predicate = \
256-
create_comparison_predicate0<PredicateType::EQ>(index, type, v, true, arena); \
255+
predicate = create_comparison_predicate0<PredicateType::EQ>(index, col_name, type, v, \
256+
true, arena); \
257257
return Status::OK(); \
258258
case PredicateType::NE: \
259-
predicate = \
260-
create_comparison_predicate0<PredicateType::NE>(index, type, v, true, arena); \
259+
predicate = create_comparison_predicate0<PredicateType::NE>(index, col_name, type, v, \
260+
true, arena); \
261261
return Status::OK(); \
262262
case PredicateType::GT: \
263-
predicate = \
264-
create_comparison_predicate0<PredicateType::GT>(index, type, v, true, arena); \
263+
predicate = create_comparison_predicate0<PredicateType::GT>(index, col_name, type, v, \
264+
true, arena); \
265265
return Status::OK(); \
266266
case PredicateType::GE: \
267-
predicate = \
268-
create_comparison_predicate0<PredicateType::GE>(index, type, v, true, arena); \
267+
predicate = create_comparison_predicate0<PredicateType::GE>(index, col_name, type, v, \
268+
true, arena); \
269269
return Status::OK(); \
270270
case PredicateType::LT: \
271-
predicate = \
272-
create_comparison_predicate0<PredicateType::LT>(index, type, v, true, arena); \
271+
predicate = create_comparison_predicate0<PredicateType::LT>(index, col_name, type, v, \
272+
true, arena); \
273273
return Status::OK(); \
274274
case PredicateType::LE: \
275-
predicate = \
276-
create_comparison_predicate0<PredicateType::LE>(index, type, v, true, arena); \
275+
predicate = create_comparison_predicate0<PredicateType::LE>(index, col_name, type, v, \
276+
true, arena); \
277277
return Status::OK(); \
278278
default: \
279279
return Status::Error<ErrorCode::INVALID_ARGUMENT>( \
280280
"invalid condition operator. operator={}", type_to_op_str(res.condition_op)); \
281281
} \
282282
}
283-
Status parse_to_predicate(const uint32_t index, const vectorized::DataTypePtr& type,
283+
Status parse_to_predicate(const uint32_t index, const std::string col_name,
284+
const vectorized::DataTypePtr& type,
284285
DeleteHandler::ConditionParseResult& res, vectorized::Arena& arena,
285286
std::shared_ptr<ColumnPredicate>& predicate) {
286287
DCHECK_EQ(res.value_str.size(), 1);
287288
if (res.condition_op == PredicateType::IS_NULL ||
288289
res.condition_op == PredicateType::IS_NOT_NULL) {
289-
predicate = NullPredicate::create_shared(
290-
index, res.condition_op == PredicateType::IS_NOT_NULL, type->get_primitive_type());
290+
predicate = NullPredicate::create_shared(index, col_name,
291+
res.condition_op == PredicateType::IS_NOT_NULL,
292+
type->get_primitive_type());
291293
return Status::OK();
292294
}
293295
StringRef v;
@@ -318,28 +320,28 @@ Status parse_to_predicate(const uint32_t index, const vectorized::DataTypePtr& t
318320
RETURN_IF_ERROR(convert<TYPE_STRING>(type, res.value_str.front(), arena, v));
319321
switch (res.condition_op) {
320322
case PredicateType::EQ:
321-
predicate =
322-
create_comparison_predicate0<PredicateType::EQ>(index, type, v, true, arena);
323+
predicate = create_comparison_predicate0<PredicateType::EQ>(index, col_name, type, v,
324+
true, arena);
323325
return Status::OK();
324326
case PredicateType::NE:
325-
predicate =
326-
create_comparison_predicate0<PredicateType::NE>(index, type, v, true, arena);
327+
predicate = create_comparison_predicate0<PredicateType::NE>(index, col_name, type, v,
328+
true, arena);
327329
return Status::OK();
328330
case PredicateType::GT:
329-
predicate =
330-
create_comparison_predicate0<PredicateType::GT>(index, type, v, true, arena);
331+
predicate = create_comparison_predicate0<PredicateType::GT>(index, col_name, type, v,
332+
true, arena);
331333
return Status::OK();
332334
case PredicateType::GE:
333-
predicate =
334-
create_comparison_predicate0<PredicateType::GE>(index, type, v, true, arena);
335+
predicate = create_comparison_predicate0<PredicateType::GE>(index, col_name, type, v,
336+
true, arena);
335337
return Status::OK();
336338
case PredicateType::LT:
337-
predicate =
338-
create_comparison_predicate0<PredicateType::LT>(index, type, v, true, arena);
339+
predicate = create_comparison_predicate0<PredicateType::LT>(index, col_name, type, v,
340+
true, arena);
339341
return Status::OK();
340342
case PredicateType::LE:
341-
predicate =
342-
create_comparison_predicate0<PredicateType::LE>(index, type, v, true, arena);
343+
predicate = create_comparison_predicate0<PredicateType::LE>(index, col_name, type, v,
344+
true, arena);
343345
return Status::OK();
344346
default:
345347
return Status::Error<ErrorCode::INVALID_ARGUMENT>(
@@ -356,21 +358,24 @@ Status parse_to_predicate(const uint32_t index, const vectorized::DataTypePtr& t
356358
#undef CONVERT_CASE
357359
}
358360

359-
Status parse_to_in_predicate(const uint32_t index, const vectorized::DataTypePtr& type,
361+
Status parse_to_in_predicate(const uint32_t index, const std::string& col_name,
362+
const vectorized::DataTypePtr& type,
360363
DeleteHandler::ConditionParseResult& res, vectorized::Arena& arena,
361364
std::shared_ptr<ColumnPredicate>& predicate) {
362365
DCHECK_GT(res.value_str.size(), 1);
363366
switch (res.condition_op) {
364367
case PredicateType::IN_LIST: {
365368
std::shared_ptr<HybridSetBase> set;
366369
RETURN_IF_ERROR(convert(type, res.value_str, arena, set));
367-
predicate = create_in_list_predicate<PredicateType::IN_LIST>(index, type, set, true);
370+
predicate =
371+
create_in_list_predicate<PredicateType::IN_LIST>(index, col_name, type, set, true);
368372
break;
369373
}
370374
case PredicateType::NOT_IN_LIST: {
371375
std::shared_ptr<HybridSetBase> set;
372376
RETURN_IF_ERROR(convert(type, res.value_str, arena, set));
373-
predicate = create_in_list_predicate<PredicateType::NOT_IN_LIST>(index, type, set, true);
377+
predicate = create_in_list_predicate<PredicateType::NOT_IN_LIST>(index, col_name, type, set,
378+
true);
374379
break;
375380
}
376381
default:
@@ -741,7 +746,7 @@ Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema,
741746
const auto& column = complete_schema->column_by_uid(col_unique_id);
742747
uint32_t index = complete_schema->field_index(col_unique_id);
743748
std::shared_ptr<ColumnPredicate> predicate;
744-
RETURN_IF_ERROR(parse_to_predicate(index, column.get_vec_type(), condition,
749+
RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition,
745750
_predicate_arena, predicate));
746751
if (predicate != nullptr) {
747752
delete_conditions->column_predicate_vec.push_back(predicate);
@@ -800,8 +805,8 @@ Status DeleteHandler::init(TabletSchemaSPtr tablet_schema,
800805
const auto& column = tablet_schema->column_by_uid(col_unique_id);
801806
uint32_t index = tablet_schema->field_index(col_unique_id);
802807
std::shared_ptr<ColumnPredicate> predicate;
803-
RETURN_IF_ERROR(parse_to_in_predicate(index, column.get_vec_type(), condition,
804-
_predicate_arena, predicate));
808+
RETURN_IF_ERROR(parse_to_in_predicate(index, column.name(), column.get_vec_type(),
809+
condition, _predicate_arena, predicate));
805810
temp.column_predicate_vec.push_back(predicate);
806811
}
807812

be/src/olap/in_list_predicate.h

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -77,32 +77,10 @@ class InListPredicateBase final : public ColumnPredicate {
7777
std::is_same_v<T, StringRef>, StringSet<DynamicContainer<std::string>>,
7878
HybridSet<Type, DynamicContainer<T>,
7979
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>>;
80-
template <typename ConditionType, typename ConvertFunc>
81-
InListPredicateBase(uint32_t column_id, const ConditionType& conditions,
82-
const ConvertFunc& convert, bool is_opposite,
83-
const vectorized::DataTypePtr& data_type, vectorized::Arena& arena)
84-
: ColumnPredicate(column_id, Type, is_opposite),
85-
_min_value(type_limit<T>::max()),
86-
_max_value(type_limit<T>::min()) {
87-
_values = std::make_shared<HybridSetType>(false);
88-
for (const auto& condition : conditions) {
89-
T tmp;
90-
if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) {
91-
tmp = convert(data_type, condition, arena);
92-
} else if constexpr (Type == TYPE_DECIMAL32 || Type == TYPE_DECIMAL64 ||
93-
Type == TYPE_DECIMAL128I || Type == TYPE_DECIMAL256) {
94-
tmp = convert(data_type, condition);
95-
} else {
96-
tmp = convert(condition);
97-
}
98-
_values->insert(&tmp);
99-
_update_min_max(tmp);
100-
}
101-
}
102-
103-
InListPredicateBase(uint32_t column_id, const std::shared_ptr<HybridSetBase>& hybrid_set,
104-
bool is_opposite, size_t char_length = 0)
105-
: ColumnPredicate(column_id, Type, is_opposite),
80+
InListPredicateBase(uint32_t column_id, std::string col_name,
81+
const std::shared_ptr<HybridSetBase>& hybrid_set, bool is_opposite,
82+
size_t char_length = 0)
83+
: ColumnPredicate(column_id, col_name, Type, is_opposite),
10684
_min_value(type_limit<T>::max()),
10785
_max_value(type_limit<T>::min()) {
10886
CHECK(hybrid_set != nullptr);

be/src/olap/like_column_predicate.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
namespace doris {
2727

2828
template <PrimitiveType T>
29-
LikeColumnPredicate<T>::LikeColumnPredicate(bool opposite, uint32_t column_id,
29+
LikeColumnPredicate<T>::LikeColumnPredicate(bool opposite, uint32_t column_id, std::string col_name,
3030
doris::FunctionContext* fn_ctx, doris::StringRef val)
31-
: ColumnPredicate(column_id, T, opposite), pattern(val) {
31+
: ColumnPredicate(column_id, col_name, T, opposite), pattern(val) {
3232
static_assert(T == TYPE_VARCHAR || T == TYPE_CHAR || T == TYPE_STRING,
3333
"LikeColumnPredicate only supports the following types: TYPE_VARCHAR, TYPE_CHAR, "
3434
"TYPE_STRING");

0 commit comments

Comments
 (0)