Skip to content

Commit 384856b

Browse files
Merge pull request ClickHouse#80329 from ClickHouse/do-not-push-down-non-deterministic-function-for-steps-which-change-rows
Disable filter-push-down for the predicate with non-deterministic fun
2 parents 5f8501c + ff2914d commit 384856b

File tree

5 files changed

+52
-21
lines changed

5 files changed

+52
-21
lines changed

src/Interpreters/ActionsDAG.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2357,7 +2357,7 @@ struct ConjunctionNodes
23572357
/// Assuming predicate is a conjunction (probably, trivial).
23582358
/// Find separate conjunctions nodes. Split nodes into allowed and rejected sets.
23592359
/// Allowed predicate is a predicate which can be calculated using only nodes from the allowed_nodes set.
2360-
ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set<const ActionsDAG::Node *> allowed_nodes)
2360+
ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set<const ActionsDAG::Node *> allowed_nodes, bool allow_non_deterministic_functions)
23612361
{
23622362
ConjunctionNodes conjunction;
23632363
std::unordered_set<const ActionsDAG::Node *> allowed;
@@ -2428,7 +2428,13 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
24282428
{
24292429
if (cur.num_allowed_children == cur.node->children.size())
24302430
{
2431-
if (cur.node->type != ActionsDAG::ActionType::ARRAY_JOIN && cur.node->type != ActionsDAG::ActionType::INPUT)
2431+
bool is_deprecated_function = !allow_non_deterministic_functions
2432+
&& cur.node->type == ActionsDAG::ActionType::FUNCTION
2433+
&& !cur.node->function_base->isDeterministicInScopeOfQuery();
2434+
2435+
if (cur.node->type != ActionsDAG::ActionType::ARRAY_JOIN
2436+
&& cur.node->type != ActionsDAG::ActionType::INPUT
2437+
&& !is_deprecated_function)
24322438
allowed_nodes.emplace(cur.node);
24332439
}
24342440

@@ -2578,7 +2584,8 @@ std::optional<ActionsDAG> ActionsDAG::splitActionsForFilterPushDown(
25782584
const std::string & filter_name,
25792585
bool removes_filter,
25802586
const Names & available_inputs,
2581-
const ColumnsWithTypeAndName & all_inputs)
2587+
const ColumnsWithTypeAndName & all_inputs,
2588+
bool allow_non_deterministic_functions)
25822589
{
25832590
Node * predicate = const_cast<Node *>(tryFindInOutputs(filter_name));
25842591
if (!predicate)
@@ -2611,7 +2618,7 @@ std::optional<ActionsDAG> ActionsDAG::splitActionsForFilterPushDown(
26112618
}
26122619
}
26132620

2614-
auto conjunction = getConjunctionNodes(predicate, allowed_nodes);
2621+
auto conjunction = getConjunctionNodes(predicate, allowed_nodes, allow_non_deterministic_functions);
26152622

26162623
if (conjunction.allowed.empty())
26172624
return {};
@@ -2688,9 +2695,9 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu
26882695
auto right_stream_allowed_nodes = get_input_nodes(right_stream_available_columns_to_push_down);
26892696
auto both_streams_allowed_nodes = get_input_nodes(equivalent_columns_to_push_down);
26902697

2691-
auto left_stream_push_down_conjunctions = getConjunctionNodes(predicate, left_stream_allowed_nodes);
2692-
auto right_stream_push_down_conjunctions = getConjunctionNodes(predicate, right_stream_allowed_nodes);
2693-
auto both_streams_push_down_conjunctions = getConjunctionNodes(predicate, both_streams_allowed_nodes);
2698+
auto left_stream_push_down_conjunctions = getConjunctionNodes(predicate, left_stream_allowed_nodes, false);
2699+
auto right_stream_push_down_conjunctions = getConjunctionNodes(predicate, right_stream_allowed_nodes, false);
2700+
auto both_streams_push_down_conjunctions = getConjunctionNodes(predicate, both_streams_allowed_nodes, false);
26942701

26952702
NodeRawConstPtrs left_stream_allowed_conjunctions = std::move(left_stream_push_down_conjunctions.allowed);
26962703
NodeRawConstPtrs right_stream_allowed_conjunctions = std::move(right_stream_push_down_conjunctions.allowed);

src/Interpreters/ActionsDAG.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,8 @@ class ActionsDAG
390390
const std::string & filter_name,
391391
bool removes_filter,
392392
const Names & available_inputs,
393-
const ColumnsWithTypeAndName & all_inputs);
393+
const ColumnsWithTypeAndName & all_inputs,
394+
bool allow_non_deterministic_functions);
394395

395396
struct ActionsForJOINFilterPushDown;
396397

src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node)
104104
return res;
105105
}
106106

107-
static std::optional<ActionsDAG> splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0)
107+
static std::optional<ActionsDAG> splitFilter(QueryPlan::Node * parent_node, bool step_changes_the_number_of_rows, const Names & available_inputs, size_t child_idx = 0)
108108
{
109109
QueryPlan::Node * child_node = parent_node->children.front();
110110
checkChildrenSize(child_node, child_idx + 1);
@@ -118,7 +118,8 @@ static std::optional<ActionsDAG> splitFilter(QueryPlan::Node * parent_node, cons
118118
bool removes_filter = filter->removesFilterColumn();
119119

120120
const auto & all_inputs = child->getInputHeaders()[child_idx].getColumnsWithTypeAndName();
121-
return expression.splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs);
121+
bool allow_deterministic_functions = !step_changes_the_number_of_rows;
122+
return expression.splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs, allow_deterministic_functions);
122123
}
123124

124125
static size_t
@@ -186,23 +187,23 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes,
186187
}
187188

188189
static size_t
189-
tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const Names & allowed_inputs,
190+
tryAddNewFilterStep(QueryPlan::Node * parent_node, bool step_changes_the_number_of_rows, QueryPlan::Nodes & nodes, const Names & allowed_inputs,
190191
bool can_remove_filter = true, size_t child_idx = 0)
191192
{
192-
if (auto split_filter = splitFilter(parent_node, allowed_inputs, child_idx))
193+
if (auto split_filter = splitFilter(parent_node, step_changes_the_number_of_rows, allowed_inputs, child_idx))
193194
return addNewFilterStepOrThrow(parent_node, nodes, std::move(*split_filter), can_remove_filter, child_idx);
194195
return 0;
195196
}
196197

197198

198199
/// Push down filter through specified type of step
199200
template <typename Step>
200-
static size_t simplePushDownOverStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, QueryPlanStepPtr & child)
201+
static size_t simplePushDownOverStep(QueryPlan::Node * parent_node, bool step_changes_the_number_of_rows, QueryPlan::Nodes & nodes, QueryPlanStepPtr & child)
201202
{
202203
if (typeid_cast<Step *>(child.get()))
203204
{
204205
Names allowed_inputs = child->getOutputHeader().getNames();
205-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs))
206+
if (auto updated_steps = tryAddNewFilterStep(parent_node, step_changes_the_number_of_rows, nodes, allowed_inputs))
206207
return updated_steps;
207208
}
208209
return 0;
@@ -507,7 +508,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
507508
bool filter_is_not_among_aggregates_arguments = merging_aggregated || filterColumnIsNotAmongAggregatesArguments(params.aggregates, filter->getFilterColumnName());
508509
const bool can_remove_filter = filter_column_is_not_among_aggregation_keys && filter_is_not_among_aggregates_arguments;
509510

510-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys, can_remove_filter))
511+
if (auto updated_steps = tryAddNewFilterStep(parent_node, true, nodes, keys, can_remove_filter))
511512
return updated_steps;
512513
}
513514

@@ -568,7 +569,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
568569
/// ) group by y with totals) where y != 2`
569570
/// Optimization will replace totals row `y, sum(x)` from `(0, 45)` to `(0, 37)`.
570571
/// It is expected to ok, cause AST optimization `enable_optimize_predicate_expression = 1` also brakes it.
571-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys))
572+
if (auto updated_steps = tryAddNewFilterStep(parent_node, false, nodes, keys))
572573
return updated_steps;
573574
}
574575

@@ -584,11 +585,11 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
584585
if (!keys_set.contains(column.name))
585586
allowed_inputs.push_back(column.name);
586587

587-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs))
588+
if (auto updated_steps = tryAddNewFilterStep(parent_node, true, nodes, allowed_inputs))
588589
return updated_steps;
589590
}
590591

591-
if (auto updated_steps = simplePushDownOverStep<DistinctStep>(parent_node, nodes, child))
592+
if (auto updated_steps = simplePushDownOverStep<DistinctStep>(parent_node, true, nodes, child))
592593
return updated_steps;
593594

594595
if (auto updated_steps = tryPushDownOverJoinStep(parent_node, nodes, child))
@@ -619,14 +620,14 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
619620
bool can_remove_filter = sort_description_it == sort_description.end();
620621

621622
Names allowed_inputs = child->getOutputHeader().getNames();
622-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, can_remove_filter))
623+
if (auto updated_steps = tryAddNewFilterStep(parent_node, false, nodes, allowed_inputs, can_remove_filter))
623624
return updated_steps;
624625
}
625626

626627
if (typeid_cast<CustomMetricLogViewStep *>(child.get()))
627628
{
628629
Names allowed_inputs = {"event_date", "event_time", "hostname"};
629-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, true))
630+
if (auto updated_steps = tryAddNewFilterStep(parent_node, true, nodes, allowed_inputs, true))
630631
return updated_steps;
631632
}
632633

@@ -636,7 +637,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
636637
bool can_remove_filter = !join_filter_set_step->isColumnPartOfSetKey(filter_column_name);
637638

638639
Names allowed_inputs = child->getOutputHeader().getNames();
639-
if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, can_remove_filter))
640+
if (auto updated_steps = tryAddNewFilterStep(parent_node, false, nodes, allowed_inputs, can_remove_filter))
640641
return updated_steps;
641642
}
642643

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
drop table if exists users_items;
2+
CREATE TABLE users_items (user_id UInt64) ENGINE = Log;
3+
INSERT INTO users_items SELECT bitAnd(number, 15) from numbers(64);
4+
5+
SELECT sum(in_sample)
6+
FROM
7+
(
8+
WITH RandomUsers AS
9+
(
10+
SELECT
11+
user_id,
12+
rand() % 2 AS in_sample
13+
FROM users_items
14+
GROUP BY user_id
15+
)
16+
SELECT
17+
user_id,
18+
in_sample
19+
FROM RandomUsers
20+
WHERE in_sample = 0
21+
);

0 commit comments

Comments
 (0)