Skip to content

Commit 4169897

Browse files
Merge pull request ClickHouse#80025 from ClickHouse/fix-lambdas-in-skip-indexes
Fix lambdas in skip index analysis.
2 parents c86415e + a3f2368 commit 4169897

File tree

4 files changed

+169
-14
lines changed

4 files changed

+169
-14
lines changed

src/Functions/FunctionsMiscellaneous.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ class ExecutableFunctionCapture : public IExecutableFunction
199199
}
200200
}
201201

202+
const ExpressionActionsPtr & getActions() const { return expression_actions; }
203+
const LambdaCapturePtr & getCapture() const { return capture; }
204+
202205
private:
203206
ExpressionActionsPtr expression_actions;
204207
LambdaCapturePtr capture;

src/Storages/MergeTree/RPNBuilder.cpp

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <Storages/MergeTree/RPNBuilder.h>
22

33
#include <Common/FieldVisitorToString.h>
4+
#include <Storages/MergeTree/KeyCondition.h>
45
#include <Core/Settings.h>
56

67
#include <Parsers/ASTLiteral.h>
@@ -18,6 +19,7 @@
1819
#include <Functions/indexHint.h>
1920
#include <Functions/IFunction.h>
2021
#include <Functions/IFunctionAdaptors.h>
22+
#include <Functions/FunctionsMiscellaneous.h>
2123

2224
#include <Interpreters/Context.h>
2325

@@ -41,7 +43,7 @@ namespace ErrorCodes
4143
namespace
4244
{
4345

44-
void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool allow_experimental_analyzer, bool legacy = false)
46+
void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, const ContextPtr & context, bool use_analyzer, bool legacy = false)
4547
{
4648
switch (node.type)
4749
{
@@ -53,37 +55,72 @@ void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & o
5355
/// If it was created from ASTLiteral, then result_name can be an alias.
5456
/// We need to convert value back to string here.
5557
const auto * column_const = typeid_cast<const ColumnConst *>(node.column.get());
56-
if (column_const && !allow_experimental_analyzer)
58+
if (column_const && !use_analyzer)
5759
writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out);
5860
else
5961
writeString(node.result_name, out);
6062
break;
6163
}
6264
case ActionsDAG::ActionType::ALIAS:
63-
appendColumnNameWithoutAlias(*node.children.front(), out, allow_experimental_analyzer, legacy);
65+
appendColumnNameWithoutAlias(*node.children.front(), out, context, use_analyzer, legacy);
6466
break;
6567
case ActionsDAG::ActionType::ARRAY_JOIN:
6668
writeCString("arrayJoin(", out);
67-
appendColumnNameWithoutAlias(*node.children.front(), out, allow_experimental_analyzer, legacy);
69+
appendColumnNameWithoutAlias(*node.children.front(), out, context, use_analyzer, legacy);
6870
writeChar(')', out);
6971
break;
7072
case ActionsDAG::ActionType::FUNCTION:
7173
{
72-
auto name = node.function_base->getName();
73-
if (legacy && name == "modulo")
74-
writeCString("moduloLegacy", out);
74+
if (const auto * func_capture = typeid_cast<const ExecutableFunctionCapture *>(node.function.get()))
75+
{
76+
const auto & capture = func_capture->getCapture();
77+
auto capture_dag = func_capture->getActions()->getActionsDAG().clone();
78+
if (!node.children.empty())
79+
{
80+
auto captured_columns_dag = ActionsDAG::cloneSubDAG(node.children, false);
81+
auto & outputs = captured_columns_dag.getOutputs();
82+
for (size_t i = 0; i < capture->captured_names.size(); ++i)
83+
outputs[i] = &captured_columns_dag.addAlias(*outputs[i], capture->captured_names[i]);
84+
85+
capture_dag = ActionsDAG::merge(std::move(captured_columns_dag), std::move(capture_dag));
86+
}
87+
88+
writeString("lambda(tuple(", out);
89+
bool first = true;
90+
for (const auto & arg : capture->lambda_arguments)
91+
{
92+
if (!first)
93+
writeCString(", ", out);
94+
first = false;
95+
96+
writeString(arg.name, out);
97+
}
98+
writeString("), ", out);
99+
100+
ActionsDAGWithInversionPushDown inverted_capture_dag(capture_dag.getOutputs().at(0), context);
101+
appendColumnNameWithoutAlias(*inverted_capture_dag.predicate, out, context, use_analyzer, legacy);
102+
writeChar(')', out);
103+
break;
104+
}
75105
else
76-
writeString(name, out);
106+
{
107+
auto name = node.function_base->getName();
108+
if (legacy && name == "modulo")
109+
writeCString("moduloLegacy", out);
110+
else
111+
writeString(name, out);
112+
113+
writeChar('(', out);
114+
}
77115

78-
writeChar('(', out);
79116
bool first = true;
80117
for (const auto * arg : node.children)
81118
{
82119
if (!first)
83120
writeCString(", ", out);
84121
first = false;
85122

86-
appendColumnNameWithoutAlias(*arg, out, allow_experimental_analyzer, legacy);
123+
appendColumnNameWithoutAlias(*arg, out, context, use_analyzer, legacy);
87124
}
88125
writeChar(')', out);
89126
break;
@@ -94,10 +131,10 @@ void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & o
94131
}
95132
}
96133

97-
String getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool allow_experimental_analyzer, bool legacy = false)
134+
String getColumnNameWithoutAlias(const ActionsDAG::Node & node, const ContextPtr & context, bool use_analyzer, bool legacy = false)
98135
{
99136
WriteBufferFromOwnString out;
100-
appendColumnNameWithoutAlias(node, out, allow_experimental_analyzer, legacy);
137+
appendColumnNameWithoutAlias(node, out, context, use_analyzer, legacy);
101138

102139
return std::move(out.str());
103140
}
@@ -148,7 +185,7 @@ std::string RPNBuilderTreeNode::getColumnName() const
148185
if (ast_node)
149186
return ast_node->getColumnNameWithoutAlias();
150187

151-
return getColumnNameWithoutAlias(*dag_node, getTreeContext().getSettings()[Setting::allow_experimental_analyzer]);
188+
return getColumnNameWithoutAlias(*dag_node, getTreeContext().getQueryContext(), getTreeContext().getSettings()[Setting::allow_experimental_analyzer]);
152189
}
153190

154191
std::string RPNBuilderTreeNode::getColumnNameWithModuloLegacy() const
@@ -160,7 +197,7 @@ std::string RPNBuilderTreeNode::getColumnNameWithModuloLegacy() const
160197
return adjusted_ast->getColumnNameWithoutAlias();
161198
}
162199

163-
return getColumnNameWithoutAlias(*dag_node, getTreeContext().getSettings()[Setting::allow_experimental_analyzer], true /*legacy*/);
200+
return getColumnNameWithoutAlias(*dag_node, getTreeContext().getQueryContext(), getTreeContext().getSettings()[Setting::allow_experimental_analyzer], true /*legacy*/);
164201
}
165202

166203
bool RPNBuilderTreeNode::isFunction() const
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
-- { echo On }
2+
3+
EXPLAIN indexes = 1, description=0
4+
SELECT arr
5+
FROM index_test
6+
WHERE has(arrayMap(x -> lower(x), arr), lower('a_12'))
7+
SETTINGS enable_analyzer = 1;
8+
Expression
9+
Filter
10+
ReadFromMergeTree
11+
Indexes:
12+
Skip
13+
Name: array_index
14+
Description: bloom_filter GRANULARITY 1
15+
Parts: 1/1
16+
Granules: 1/4
17+
SELECT arr
18+
FROM index_test
19+
WHERE has(arrayMap(x -> lower(x), arr), lower('a_12'))
20+
SETTINGS enable_analyzer = 1;
21+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12']
22+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12','A_13']
23+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12','A_13','A_14']
24+
EXPLAIN indexes = 1, description=0
25+
SELECT arr
26+
FROM index_test
27+
WHERE has(arrayMap((x, y) -> concat(lower(x), y), arr, arr), 'a_12A_12')
28+
SETTINGS enable_analyzer = 1;
29+
Expression
30+
Filter
31+
ReadFromMergeTree
32+
Indexes:
33+
Skip
34+
Name: array_index_2
35+
Description: bloom_filter GRANULARITY 1
36+
Parts: 1/1
37+
Granules: 1/4
38+
SELECT arr
39+
FROM index_test
40+
WHERE has(arrayMap((x, y) -> concat(lower(x), y), arr, arr), 'a_12A_12')
41+
SETTINGS enable_analyzer = 1;
42+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12']
43+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12','A_13']
44+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12','A_13','A_14']
45+
EXPLAIN indexes = 1, description=0
46+
SELECT arr
47+
FROM index_test
48+
WHERE has(arrayMap((x, y) -> concat(lower(x), y, '_', toString(id)), arr, arr), 'a_12A_12_13')
49+
SETTINGS enable_analyzer = 1;
50+
Expression
51+
Filter
52+
ReadFromMergeTree
53+
Indexes:
54+
Skip
55+
Name: array_index_3
56+
Description: bloom_filter GRANULARITY 1
57+
Parts: 1/1
58+
Granules: 1/4
59+
SELECT arr
60+
FROM index_test
61+
WHERE has(arrayMap((x, y) -> concat(lower(x), y, '_', toString(id)), arr, arr), 'a_12A_12_13')
62+
SETTINGS enable_analyzer = 1;
63+
['A_0','A_1','A_2','A_3','A_4','A_5','A_6','A_7','A_8','A_9','A_10','A_11','A_12']
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
-- Tags: no-random-merge-tree-settings, no-random-settings, no-parallel-replicas
2+
3+
DROP TABLE IF EXISTS index_test;
4+
CREATE TABLE index_test
5+
(
6+
id UInt32,
7+
arr Array(String),
8+
INDEX array_index arrayMap(x -> lower(x), arr) TYPE bloom_filter(0.01) GRANULARITY 1,
9+
INDEX array_index_2 arrayMap((x, y) -> concat(lower(x), y), arr, arr) TYPE bloom_filter(0.01) GRANULARITY 1,
10+
INDEX array_index_3 arrayMap((x, y) -> concat(lower(x), y, '_', toString(id)), arr, arr) TYPE bloom_filter(0.01) GRANULARITY 1
11+
)
12+
ENGINE = MergeTree
13+
ORDER BY tuple()
14+
SETTINGS allow_suspicious_indices = 1, index_granularity = 4;
15+
16+
insert into index_test select number, arrayMap(x -> 'A_' || toString(x) , range(number)) from numbers(16);
17+
18+
-- { echo On }
19+
20+
EXPLAIN indexes = 1, description=0
21+
SELECT arr
22+
FROM index_test
23+
WHERE has(arrayMap(x -> lower(x), arr), lower('a_12'))
24+
SETTINGS enable_analyzer = 1;
25+
26+
SELECT arr
27+
FROM index_test
28+
WHERE has(arrayMap(x -> lower(x), arr), lower('a_12'))
29+
SETTINGS enable_analyzer = 1;
30+
31+
32+
EXPLAIN indexes = 1, description=0
33+
SELECT arr
34+
FROM index_test
35+
WHERE has(arrayMap((x, y) -> concat(lower(x), y), arr, arr), 'a_12A_12')
36+
SETTINGS enable_analyzer = 1;
37+
38+
SELECT arr
39+
FROM index_test
40+
WHERE has(arrayMap((x, y) -> concat(lower(x), y), arr, arr), 'a_12A_12')
41+
SETTINGS enable_analyzer = 1;
42+
43+
EXPLAIN indexes = 1, description=0
44+
SELECT arr
45+
FROM index_test
46+
WHERE has(arrayMap((x, y) -> concat(lower(x), y, '_', toString(id)), arr, arr), 'a_12A_12_13')
47+
SETTINGS enable_analyzer = 1;
48+
49+
SELECT arr
50+
FROM index_test
51+
WHERE has(arrayMap((x, y) -> concat(lower(x), y, '_', toString(id)), arr, arr), 'a_12A_12_13')
52+
SETTINGS enable_analyzer = 1;

0 commit comments

Comments
 (0)