Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions be/src/olap/block_column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "olap/column_predicate.h"
#include "olap/olap_common.h"
#include "vec/columns/column.h"
#include "vec/exec/format/parquet/parquet_pred_cmp.h"

namespace roaring {
class Roaring;
Expand Down Expand Up @@ -79,6 +80,10 @@ class BlockColumnPredicate {
throw Exception(Status::FatalError("should not reach here"));
}

virtual bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const {
throw Exception(Status::FatalError("should not reach here"));
}

virtual bool evaluate_and(const segment_v2::BloomFilter* bf) const {
throw Exception(Status::FatalError("should not reach here"));
}
Expand Down Expand Up @@ -117,6 +122,9 @@ class SingleColumnBlockPredicate : public BlockColumnPredicate {
bool* flags) const override;
bool support_zonemap() const override { return _predicate->support_zonemap(); }
bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override;
bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const override {
return _predicate->evaluate_and(statistic);
}
bool evaluate_and(const segment_v2::BloomFilter* bf) const override;
bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size,
Expand Down Expand Up @@ -180,6 +188,18 @@ class OrBlockColumnPredicate : public MutilColumnBlockPredicate {
bool* flags) const override;
void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size,
bool* flags) const override;
bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const override {
if (num_of_column_predicate() == 1) {
return _block_column_predicate_vec[0]->evaluate_and(statistic);
} else {
for (int i = 0; i < num_of_column_predicate(); ++i) {
if (_block_column_predicate_vec[i]->evaluate_and(statistic)) {
return true;
}
}
return false;
}
}

// note(wb) we didnt't implement evaluate_vec method here, because storage layer only support AND predicate now;
};
Expand All @@ -203,6 +223,15 @@ class AndBlockColumnPredicate : public MutilColumnBlockPredicate {

bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;

bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const override {
for (auto& block_column_predicate : _block_column_predicate_vec) {
if (!block_column_predicate->evaluate_and(statistic)) {
return false;
}
}
return true;
}

bool can_do_bloom_filter(bool ngram) const override {
for (auto& pred : _block_column_predicate_vec) {
if (!pred->can_do_bloom_filter(ngram)) {
Expand Down
10 changes: 10 additions & 0 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "util/defer_op.h"
#include "util/runtime_profile.h"
#include "vec/columns/column.h"
#include "vec/exec/format/parquet/parquet_pred_cmp.h"
#include "vec/exprs/vruntimefilter_wrapper.h"

using namespace doris::segment_v2;
Expand Down Expand Up @@ -222,6 +223,15 @@ class ColumnPredicate {

virtual bool can_do_bloom_filter(bool ngram) const { return false; }

/**
* Figure out whether this page is matched partially or completely.
*/
virtual bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const {
throw Exception(ErrorCode::INTERNAL_ERROR,
"ParquetPredicate is not supported by this predicate!");
return true;
}

// used to evaluate pre read column in lazy materialization
// now only support integer/float
// a vectorized eval way
Expand Down
47 changes: 47 additions & 0 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,53 @@ class ComparisonPredicateBase : public ColumnPredicate {
}
}

/**
* To figure out whether this page is matched partially or completely.
*
* 1. EQ: if `_value` belongs to the interval [min, max], return true to further compute each value in this page.
* 2. NE: return true to further compute each value in this page if some values not equal to `_value`.
* 3. LT|LE: if `_value` is greater than min, return true to further compute each value in this page.
* 4. GT|GE: if `_value` is less than max, return true to further compute each value in this page.
*/
bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const override {
if (!(*statistic->get_stat_func)(statistic, column_id())) {
return true;
}
vectorized::Field min_field;
vectorized::Field max_field;
if (!vectorized::ParquetPredicate::get_min_max_value(
statistic->col_schema, statistic->encoded_min_value,
statistic->encoded_max_value, *statistic->ctz, &min_field, &max_field)
.ok()) {
return true;
};
T min_value;
T max_value;
if constexpr (is_int_or_bool(Type) || is_float_or_double(Type)) {
min_value =
(typename PrimitiveTypeTraits<Type>::CppType)min_field
.template get<typename PrimitiveTypeTraits<Type>::NearestFieldType>();
max_value =
(typename PrimitiveTypeTraits<Type>::CppType)max_field
.template get<typename PrimitiveTypeTraits<Type>::NearestFieldType>();
} else {
min_value = min_field.template get<typename PrimitiveTypeTraits<Type>::CppType>();
max_value = max_field.template get<typename PrimitiveTypeTraits<Type>::CppType>();
}

if constexpr (PT == PredicateType::EQ) {
return Compare::less_equal(min_value, _value) &&
Compare::greater_equal(max_value, _value);
} else if constexpr (PT == PredicateType::NE) {
return !Compare::equal(min_value, _value) || !Compare::equal(max_value, _value);
} else if constexpr (PT == PredicateType::LT || PT == PredicateType::LE) {
return Compare::less_equal(min_value, _value);
} else {
static_assert(PT == PredicateType::GT || PT == PredicateType::GE);
return Compare::greater_equal(max_value, _value);
}
}

bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
if (statistic.first->is_null() || statistic.second->is_null()) {
return false;
Expand Down
7 changes: 4 additions & 3 deletions be/src/olap/delete_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,8 @@ Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema,
condition.__set_column_unique_id(col_unique_id);
const auto& column = complete_schema->column_by_uid(col_unique_id);
uint32_t index = complete_schema->field_index(col_unique_id);
auto* predicate = parse_to_predicate(column, index, condition, _predicate_arena, true);
auto* predicate =
parse_to_predicate(column.get_vec_type(), index, condition, _predicate_arena, true);
if (predicate != nullptr) {
delete_conditions->column_predicate_vec.push_back(predicate);
}
Expand Down Expand Up @@ -440,8 +441,8 @@ Status DeleteHandler::init(TabletSchemaSPtr tablet_schema,
}
const auto& column = tablet_schema->column_by_uid(col_unique_id);
uint32_t index = tablet_schema->field_index(col_unique_id);
temp.column_predicate_vec.push_back(
parse_to_predicate(column, index, condition, _predicate_arena, true));
temp.column_predicate_vec.push_back(parse_to_predicate(
column.get_vec_type(), index, condition, _predicate_arena, true));
}

_del_conds.emplace_back(std::move(temp));
Expand Down
73 changes: 56 additions & 17 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "vec/columns/column_dictionary.h"
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"

// for uint24_t
template <>
Expand Down Expand Up @@ -67,19 +68,19 @@ class InListPredicateBase : public ColumnPredicate {
using T = typename PrimitiveTypeTraits<Type>::CppType;
template <typename ConditionType, typename ConvertFunc>
InListPredicateBase(uint32_t column_id, const ConditionType& conditions,
const ConvertFunc& convert, bool is_opposite, const TabletColumn* col,
vectorized::Arena& arena)
const ConvertFunc& convert, bool is_opposite,
const vectorized::DataTypePtr& data_type, vectorized::Arena& arena)
: ColumnPredicate(column_id, is_opposite),
_min_value(type_limit<T>::max()),
_max_value(type_limit<T>::min()) {
_values = std::make_shared<HybridSetType>(false);
for (const auto& condition : conditions) {
T tmp;
if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) {
tmp = convert(*col, condition, arena);
tmp = convert(data_type, condition, arena);
} else if constexpr (Type == TYPE_DECIMAL32 || Type == TYPE_DECIMAL64 ||
Type == TYPE_DECIMAL128I || Type == TYPE_DECIMAL256) {
tmp = convert(*col, condition);
tmp = convert(data_type, condition);
} else {
tmp = convert(condition);
}
Expand Down Expand Up @@ -245,6 +246,42 @@ class InListPredicateBase : public ColumnPredicate {
}
}

bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const override {
if (!(*statistic->get_stat_func)(statistic, column_id())) {
return true;
}
vectorized::Field min_field;
vectorized::Field max_field;
if (!vectorized::ParquetPredicate::get_min_max_value(
statistic->col_schema, statistic->encoded_min_value,
statistic->encoded_max_value, *statistic->ctz, &min_field, &max_field)
.ok()) {
return true;
};
T min_value;
T max_value;
if constexpr (is_int_or_bool(Type) || is_float_or_double(Type)) {
min_value =
(typename PrimitiveTypeTraits<Type>::CppType)min_field
.template get<typename PrimitiveTypeTraits<Type>::NearestFieldType>();
max_value =
(typename PrimitiveTypeTraits<Type>::CppType)max_field
.template get<typename PrimitiveTypeTraits<Type>::NearestFieldType>();
} else {
min_value = min_field.template get<typename PrimitiveTypeTraits<Type>::CppType>();
max_value = max_field.template get<typename PrimitiveTypeTraits<Type>::CppType>();
}

if constexpr (PT == PredicateType::IN_LIST) {
return (Compare::less_equal(min_value, _max_value) &&
Compare::greater_equal(max_value, _min_value)) ||
(Compare::greater_equal(max_value, _min_value) &&
Compare::less_equal(min_value, _max_value));
} else {
return true;
}
}

bool evaluate_and(const StringRef* dict_words, const size_t count) const override {
for (size_t i = 0; i != count; ++i) {
const auto found = _values->find(dict_words[i].data, dict_words[i].size) ^ _opposite;
Expand Down Expand Up @@ -548,57 +585,59 @@ template <PrimitiveType Type, PredicateType PT, typename ConditionType, typename
size_t N = 0>
ColumnPredicate* _create_in_list_predicate(uint32_t column_id, const ConditionType& conditions,
const ConvertFunc& convert, bool is_opposite,
const TabletColumn* col, vectorized::Arena& arena) {
const vectorized::DataTypePtr& data_type,
vectorized::Arena& arena) {
using T = typename PrimitiveTypeTraits<Type>::CppType;
if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
using Set = std::conditional_t<
std::is_same_v<T, StringRef>, StringSet<FixedContainer<std::string, N>>,
HybridSet<Type, FixedContainer<T, N>,
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>;
return new InListPredicateBase<Type, PT, Set>(column_id, conditions, convert, is_opposite,
col, arena);
data_type, arena);
} else {
using Set = std::conditional_t<
std::is_same_v<T, StringRef>, StringSet<DynamicContainer<std::string>>,
HybridSet<Type, DynamicContainer<T>,
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>;
return new InListPredicateBase<Type, PT, Set>(column_id, conditions, convert, is_opposite,
col, arena);
data_type, arena);
}
}

template <PrimitiveType Type, PredicateType PT, typename ConditionType, typename ConvertFunc>
ColumnPredicate* create_in_list_predicate(uint32_t column_id, const ConditionType& conditions,
const ConvertFunc& convert, bool is_opposite,
const TabletColumn* col, vectorized::Arena& arena) {
const vectorized::DataTypePtr& data_type,
vectorized::Arena& arena) {
if (conditions.size() == 1) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 1>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == 2) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 2>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == 3) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 3>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == 4) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 4>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == 5) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 5>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == 6) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 6>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == 7) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 7>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
} else if (conditions.size() == FIXED_CONTAINER_MAX_SIZE) {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc,
FIXED_CONTAINER_MAX_SIZE>(column_id, conditions, convert,
is_opposite, col, arena);
is_opposite, data_type, arena);
} else {
return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc>(
column_id, conditions, convert, is_opposite, col, arena);
column_id, conditions, convert, is_opposite, data_type, arena);
}
}

Expand Down
11 changes: 11 additions & 0 deletions be/src/olap/null_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ class NullPredicate : public ColumnPredicate {
}
}

bool evaluate_and(vectorized::ParquetPredicate::ColumnStat* statistic) const override {
if (!(*statistic->get_stat_func)(statistic, column_id())) {
return true;
}
if (_is_null) {
return true;
} else {
return !statistic->is_all_null;
}
}

bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
// evaluate_del only use for delete condition to filter page, need use delete condition origin value,
// when opposite==true, origin value 'is null'->'is not null' and 'is not null'->'is null',
Expand Down
Loading
Loading