Skip to content

Commit b9f0bc5

Browse files
Merge pull request ClickHouse#80360 from ClickHouse/logical-join-predicate-push-down-pre-filter
Fix predicate-push-down for the logical JOIN step if the pre-join exp…
2 parents 76957a2 + ed50f21 commit b9f0bc5

File tree

5 files changed

+68
-2
lines changed

5 files changed

+68
-2
lines changed

src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,33 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
387387

388388
size_t updated_steps = 0;
389389

390+
/// For the logical join step, we need to merge pre-join actions to filter dag.
391+
/// TODO: this should be refactored and replaced with optimizations which
392+
/// 1. push filter/expression into JOIN (as post-filter)
393+
/// 2. move filter within JOIN step, potentially changing JoinKind
394+
/// 3. push filter/expression out of JOIN (from pre-filter)
395+
auto fix_predicate_for_join_logical_step = [&](ActionsDAG filter_dag, const ActionsDAG & side_dag)
396+
{
397+
filter_dag = ActionsDAG::merge(side_dag.clone(), std::move(filter_dag));
398+
auto & outputs = filter_dag.getOutputs();
399+
outputs.resize(1);
400+
outputs.insert(outputs.end(), filter_dag.getInputs().begin(), filter_dag.getInputs().end());
401+
filter_dag.removeUnusedActions();
402+
return filter_dag;
403+
};
404+
390405
if (join_filter_push_down_actions.left_stream_filter_to_push_down)
391406
{
407+
if (logical_join)
408+
{
409+
410+
join_filter_push_down_actions.left_stream_filter_to_push_down = fix_predicate_for_join_logical_step(
411+
std::move(*join_filter_push_down_actions.left_stream_filter_to_push_down),
412+
*logical_join->getExpressionActions().left_pre_join_actions
413+
);
414+
join_filter_push_down_actions.left_stream_filter_removes_filter = true;
415+
}
416+
392417
const auto & result_name = join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name;
393418
updated_steps += addNewFilterStepOrThrow(parent_node,
394419
nodes,
@@ -404,6 +429,16 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
404429

405430
if (join_filter_push_down_actions.right_stream_filter_to_push_down && allow_push_down_to_right)
406431
{
432+
if (logical_join)
433+
{
434+
435+
join_filter_push_down_actions.right_stream_filter_to_push_down = fix_predicate_for_join_logical_step(
436+
std::move(*join_filter_push_down_actions.right_stream_filter_to_push_down),
437+
*logical_join->getExpressionActions().right_pre_join_actions
438+
);
439+
join_filter_push_down_actions.right_stream_filter_removes_filter = true;
440+
}
441+
407442
const auto & result_name = join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name;
408443
updated_steps += addNewFilterStepOrThrow(parent_node,
409444
nodes,
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
1
2+
1

tests/queries/0_stateless/03393_join_bug_77848.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ SET enable_analyzer = 1;
1313
-- TODO(@vdimir): NOT_FOUND_COLUMN_IN_BLOCK is a bug, should be fixed
1414
-- This tests ensures that query does not crash at least
1515

16-
SELECT 1 FROM BadTable i LEFT JOIN BadJoin c ON i.id_uint = toUInt128(c.id) WHERE equals(i.id_uint, 12); -- { serverError NOT_FOUND_COLUMN_IN_BLOCK}
16+
SELECT 1 FROM BadTable i LEFT JOIN BadJoin c ON i.id_uint = toUInt128(c.id) WHERE equals(i.id_uint, 12);
1717

18-
SELECT equals(i.id_uint, 12) FROM BadTable i LEFT JOIN BadJoin c ON i.id_uint = toUInt128(c.id) WHERE equals(i.id_uint, 12); -- { serverError NOT_FOUND_COLUMN_IN_BLOCK}
18+
SELECT equals(i.id_uint, 12) FROM BadTable i LEFT JOIN BadJoin c ON i.id_uint = toUInt128(c.id) WHERE equals(i.id_uint, 12);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2025-04-01
2+
2025-04-01
3+
2025-04-01
4+
2025-04-01
5+
2025-04-01
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
drop table if exists td;
2+
drop table if exists tdt;
3+
4+
CREATE TABLE td (id Int16, d Date) ENGINE=MergeTree() order by id;
5+
CREATE TABLE tdt (id Int16, dt DateTime) ENGINE=MergeTree() order by id;
6+
7+
insert into td values (1,'2025-03-01'),(2,'2025-04-01');
8+
insert into tdt values (1,'2025-03-01 01:01:01'),(2,'2025-03-01 02:01:01'),(3,'2025-04-01 03:01:01'),(4,'2025-04-01 04:01:01'),(5,'2025-04-01 05:01:01');
9+
10+
SELECT td_d FROM (SELECT t.id td_id, t.d td_d, uniqExact(tdt.id) as cnt FROM td as t LEFT JOIN tdt ON toDate(tdt.dt) = t.d GROUP BY td_id, td_d) WHERE td_d = '2025-04-01';
11+
SELECT td_d FROM (SELECT t.id td_id, t.d td_d, uniqExact(tdt.id) as cnt FROM tdt RIGHT JOIN td as t ON toDate(tdt.dt) = t.d GROUP BY td_id, td_d) WHERE td_d = '2025-04-01';
12+
13+
SELECT td_d FROM (SELECT t.id td_id, t.d td_d, uniqExact(tdt.id) as cnt FROM td as t INNER JOIN tdt ON toDate(tdt.dt) = t.d GROUP BY td_id, td_d) WHERE td_d = '2025-04-01';
14+
SELECT td_d FROM (SELECT t.id td_id, t.d td_d, uniqExact(tdt.id) as cnt FROM tdt INNER JOIN td as t ON toDate(tdt.dt) = t.d GROUP BY td_id, td_d) WHERE td_d = '2025-04-01';
15+
16+
CREATE VIEW v AS
17+
SELECT
18+
t.id td_id, t.d td_d, uniqExact(tdt.id) as cnt
19+
FROM
20+
td as t
21+
LEFT JOIN tdt ON toDate(tdt.dt) = t.d
22+
GROUP BY td_id, td_d;
23+
24+
SELECT td_d FROM v WHERE td_d = '2025-04-01';

0 commit comments

Comments
 (0)