Skip to content

Commit 9e1f5ec

Browse files
Kusto phase 3 Projection Optimization (#413)
* Add db info to optimized query * fix OR logic in projection optimization * remove logic for preventing optimize same key, every predicates will be optimized * fix repeated optimization * read 'in' function for projection optimization * add add/or cases * add in(<constant value>) projection optimization support * revert back "in" subquery support * fixed data integrity issue, introduce is_projection_optimized flag to expressionAnalyzer * Added more test cases
1 parent 78badad commit 9e1f5ec

File tree

9 files changed

+165
-81
lines changed

9 files changed

+165
-81
lines changed

src/Interpreters/ExpressionAnalyzer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::nonc
109109
{
110110
}
111111

112+
ExpressionAnalyzer(const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_, bool is_projection_optimized_)
113+
: ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false, false, {}, false, is_projection_optimized_)
114+
{
115+
}
116+
112117
~ExpressionAnalyzer();
113118

114119
void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);

src/Interpreters/InterpreterSelectQuery.cpp

Lines changed: 68 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <Parsers/ExpressionListParsers.h>
1616
#include <Parsers/parseQuery.h>
1717
#include <Parsers/FunctionParameterValuesVisitor.h>
18+
#include <Parsers/formatAST.h>
1819

1920
#include <Access/Common/AccessFlags.h>
2021
#include <Access/ContextAccess.h>
@@ -646,14 +647,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
646647
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty() && metadata_snapshot->hasPrimaryKey())
647648
{
648649
const auto primary_keys = metadata_snapshot->getPrimaryKeyColumns();
649-
const auto main_table_name = getTableName(query.tables());
650-
bool optimized = false;
651-
auto pkoptimized_where_ast = pkOptimization(metadata_snapshot->getProjections(), query.where(), primary_keys, main_table_name, optimized);
652-
if (optimized)
653-
{
654-
query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(pkoptimized_where_ast));
655-
options.is_projection_optimized = true;
656-
}
650+
auto pkoptimized_where_ast = pkOptimization(metadata_snapshot->getProjections(), query.where(), primary_keys);
651+
query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(pkoptimized_where_ast));
652+
options.is_projection_optimized = true;
657653
}
658654
}
659655

@@ -673,6 +669,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
673669
SelectQueryInfo current_info;
674670
current_info.query = query_ptr;
675671
current_info.syntax_analyzer_result = syntax_analyzer_result;
672+
current_info.is_projection_optimized = options.is_projection_optimized;
676673

677674
Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
678675
const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
@@ -2179,9 +2176,7 @@ bool InterpreterSelectQuery::shouldMoveToPrewhere() const
21792176
ASTPtr InterpreterSelectQuery::pkOptimization(
21802177
const ProjectionsDescription & projections,
21812178
const ASTPtr & where_ast,
2182-
const Names & primary_keys,
2183-
const String & main_table,
2184-
bool & optimized) const
2179+
const Names & primary_keys) const
21852180
{
21862181
NameSet proj_pks = {};
21872182
for (auto & projection: projections)
@@ -2203,82 +2198,99 @@ ASTPtr InterpreterSelectQuery::pkOptimization(
22032198
return where_ast;
22042199
}
22052200
}
2206-
22072201
}
22082202

2209-
const auto and_function = makeASTFunction("and");
2210-
2211-
//for keys in where_ast
2212-
NameSet optimized_where_keys = {};
2213-
analyze_where_ast(where_ast, and_function, proj_pks, optimized_where_keys, primary_keys, main_table, optimized);
2214-
if (optimized)
2215-
{
2216-
and_function->arguments->children.push_back(where_ast->clone());
2217-
return and_function;
2218-
}
2219-
return where_ast;
2203+
return analyze_where_ast(where_ast, proj_pks, primary_keys);
22202204
}
22212205

2222-
void InterpreterSelectQuery::analyze_where_ast(
2206+
ASTPtr InterpreterSelectQuery::analyze_where_ast(
22232207
const ASTPtr & ast,
2224-
const ASTPtr & func,
22252208
NameSet & proj_pks,
2226-
NameSet & optimized_where_keys,
2227-
const Names & primary_keys,
2228-
const String & main_table,
2229-
bool & optimized) const
2209+
const Names & primary_keys) const
22302210
{
2231-
if (optimized)
2232-
return;
2233-
2234-
// Does not support other condition except for "="
2235-
if (const auto * ast_function_node = ast->as<ASTFunction>())
2211+
bool contains_pk = false;
2212+
if (const auto ast_function_node = ast->as<ASTFunction>())
22362213
{
22372214
auto arg_size = ast_function_node->arguments ? ast_function_node->arguments->children.size() : 0;
2215+
// Does not support other condition except for "="
22382216
if (ast_function_node->name == "equals" && arg_size == 2)
22392217
{
22402218
auto lhs_argument = ast_function_node->arguments->children.at(0);
22412219
auto rhs_argument = ast_function_node->arguments->children.at(1);
22422220
String lhs = getIdentifier(lhs_argument);
22432221
String rhs = getIdentifier(rhs_argument);
22442222
auto col_name = (!lhs.empty()) ? lhs:rhs;
2245-
bool contains_pk = false;
2223+
contains_pk = false;
22462224
if (std::find(primary_keys.begin(), primary_keys.end(), col_name) != primary_keys.end())
22472225
contains_pk = true;
22482226

2249-
if (proj_pks.contains(col_name) && !optimized_where_keys.contains(col_name) && !contains_pk)
2227+
if (proj_pks.contains(col_name) && !contains_pk)
22502228
{
2251-
optimized_where_keys.insert(col_name);
2252-
ASTPtr new_ast = create_proj_optimized_ast(ast, primary_keys, main_table);
2253-
auto * function_node = func->as<ASTFunction>();
2254-
function_node->arguments->children.push_back(new_ast);
2255-
optimized = true;
2256-
return;
2229+
ASTPtr rewrite_ast = create_proj_optimized_ast(ast, primary_keys);
2230+
auto and_func = makeASTFunction("and", std::move(rewrite_ast), ast->clone());
2231+
return and_func;
2232+
}
2233+
}
2234+
else if (ast_function_node->name == "in")
2235+
{
2236+
ASTPtr rewrite_ast;
2237+
const auto * subquery = ast_function_node->arguments->children[1]->as<ASTSubquery>();
2238+
if (!subquery)
2239+
{
2240+
auto lhs_argument = ast_function_node->arguments->children.at(0);
2241+
contains_pk = false;
2242+
bool proj_pks_contains = false;
2243+
if (auto func = lhs_argument->as<ASTFunction>())
2244+
{
2245+
if (func->name == "tuple")
2246+
{
2247+
for (auto key : func->arguments->children)
2248+
{
2249+
String col_name = getIdentifier(key);
2250+
if (std::find(primary_keys.begin(), primary_keys.end(), col_name) != primary_keys.end())
2251+
contains_pk = true;
2252+
if (proj_pks.contains(col_name))
2253+
proj_pks_contains = true;
2254+
}
2255+
}
2256+
}
2257+
else
2258+
{
2259+
String col_name = getIdentifier(lhs_argument);
2260+
if (std::find(primary_keys.begin(), primary_keys.end(), col_name) != primary_keys.end())
2261+
contains_pk = true;
2262+
if (proj_pks.contains(col_name))
2263+
proj_pks_contains = true;
2264+
}
2265+
2266+
if (proj_pks_contains && !contains_pk)
2267+
{
2268+
rewrite_ast = create_proj_optimized_ast(ast, primary_keys);
2269+
auto and_func = makeASTFunction("and", std::move(rewrite_ast), ast->clone());
2270+
return and_func;
2271+
}
22572272
}
22582273
}
22592274
else if (ast_function_node->name == "and" || ast_function_node->name == "or")
22602275
{
2276+
auto current_func = makeASTFunction(ast_function_node->name);
22612277
for (size_t i = 0; i < arg_size; i++)
22622278
{
22632279
auto argument = ast_function_node->arguments->children[i];
2264-
analyze_where_ast(argument, func, proj_pks, optimized_where_keys, primary_keys, main_table, optimized);
2280+
auto new_ast = analyze_where_ast(argument, proj_pks, primary_keys);
2281+
current_func->arguments->children.push_back(std::move(new_ast));
22652282
}
2266-
}
2267-
else /* TBD: conditions that are not "=" */
2268-
{
2269-
return;
2283+
return current_func;
22702284
}
22712285
}
2272-
else
2273-
{
2274-
return;
2275-
}
2286+
return ast;
22762287
}
22772288

22782289
/**
22792290
* @brief Manually rewrite the WHERE query, Insert a new where condition in order to
22802291
* leverage projection features
22812292
*
2293+
* Storage is not empty while calling this function
22822294
* For example, a qualified table with projection
22832295
* CREATE TABLE test_a(`src` String,`dst` String, `other_cols` String,
22842296
* PROJECTION p1(SELECT src, dst ORDER BY dst)) ENGINE = MergeTree ORDER BY src;
@@ -2289,7 +2301,7 @@ void InterpreterSelectQuery::analyze_where_ast(
22892301
* The following code will convert this select query to the following
22902302
* select * from test_a where src in (select src from test_a where dst='-42') and dst='-42';
22912303
*/
2292-
ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, const Names & primary_keys, const String & main_table) const
2304+
ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, const Names & primary_keys) const
22932305
{
22942306
auto select_query = std::make_shared<ASTSelectQuery>();
22952307
select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
@@ -2299,7 +2311,7 @@ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, con
22992311
auto tables_elem = std::make_shared<ASTTablesInSelectQueryElement>();
23002312
auto table_expr = std::make_shared<ASTTableExpression>();
23012313

2302-
table_expr->database_and_table_name = std::make_shared<ASTTableIdentifier>(main_table);
2314+
table_expr->database_and_table_name = std::make_shared<ASTTableIdentifier>(table_id);
23032315
table_expr->children.push_back(table_expr->database_and_table_name);
23042316

23052317
tables_elem->table_expression = std::move(table_expr);
@@ -2311,6 +2323,7 @@ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, con
23112323
select_query->setExpression(ASTSelectQuery::Expression::WHERE, ast->clone());
23122324

23132325
select_with_union_query->list_of_selects->children.push_back(select_query);
2326+
select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
23142327

23152328
auto subquery = std::make_shared<ASTSubquery>();
23162329
subquery->children.push_back(select_with_union_query);
@@ -2332,8 +2345,8 @@ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, con
23322345
in_function->arguments->children.push_back(tuples);
23332346
}
23342347
in_function->arguments->children.push_back(subquery);
2335-
const auto indexHintFunc = makeASTFunction("indexHint", in_function);
2336-
return indexHintFunc;
2348+
2349+
return makeASTFunction("indexHint", std::move(in_function));
23372350
}
23382351

23392352
/// Note that this is const and accepts the analysis ref to be able to use it to do analysis for parallel replicas
@@ -3511,14 +3524,4 @@ String InterpreterSelectQuery::getIdentifier(ASTPtr & argument) const
35113524
return getIdentifier(argument->children.at(0));
35123525
}
35133526

3514-
String InterpreterSelectQuery::getTableName(const ASTPtr & tables_in_select_query_ast) const
3515-
{
3516-
const auto & tables_in_select_query = tables_in_select_query_ast->as<ASTTablesInSelectQuery &>();
3517-
const auto & tables_element = tables_in_select_query.children[0]->as<ASTTablesInSelectQueryElement &>();
3518-
const auto & table_expression = tables_element.table_expression->as<ASTTableExpression &>();
3519-
auto table_name = table_expression.database_and_table_name->as<ASTTableIdentifier>()->getTableId().table_name;
3520-
3521-
return table_name;
3522-
}
3523-
35243527
}

src/Interpreters/InterpreterSelectQuery.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,10 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery
164164

165165
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
166166

167-
ASTPtr pkOptimization(const ProjectionsDescription & projections, const ASTPtr & where_ast, const Names & primary_keys, const String & main_table, bool & optimized) const;
168-
ASTPtr create_proj_optimized_ast(const ASTPtr & ast, const Names & primary_keys, const String & main_table) const;
167+
ASTPtr pkOptimization(const ProjectionsDescription & projections, const ASTPtr & where_ast, const Names & primary_keys) const;
168+
ASTPtr create_proj_optimized_ast(const ASTPtr & ast, const Names & primary_keys) const;
169169

170-
void analyze_where_ast(const ASTPtr & ast, const ASTPtr & func, NameSet & proj_pks, NameSet & optimized_where_keys, const Names & primary_keys, const String & main_table, bool & optimized) const;
170+
ASTPtr analyze_where_ast(const ASTPtr & ast, NameSet & proj_pks, const Names & primary_keys) const;
171171
void addPrewhereAliasActions();
172172
void applyFiltersToPrewhereInAnalysis(ExpressionAnalysisResult & analysis) const;
173173
bool shouldMoveToPrewhere() const;
@@ -203,7 +203,6 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery
203203
std::optional<UInt64> getTrivialCount(UInt64 max_parallel_replicas);
204204
/// Check if we can limit block size to read based on LIMIT clause
205205
UInt64 maxBlockSizeByLimit() const;
206-
String getTableName(const ASTPtr & tables_in_select_query) const;
207206
String getIdentifier(ASTPtr & argument) const;
208207

209208
enum class Modificator

src/Storages/MergeTree/KeyCondition.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawCon
698698
* For index to work when something like "WHERE Date = toDate(now())" is written.
699699
*/
700700
Block KeyCondition::getBlockWithConstants(
701-
const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, ContextPtr context)
701+
const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, ContextPtr context, bool is_projection_optimized)
702702
{
703703
Block result
704704
{
@@ -707,7 +707,7 @@ Block KeyCondition::getBlockWithConstants(
707707

708708
if (syntax_analyzer_result)
709709
{
710-
auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActionsDAG();
710+
auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, context, is_projection_optimized).getConstActionsDAG();
711711
for (const auto & action_node : actions->getOutputs())
712712
{
713713
if (action_node->column)

src/Storages/MergeTree/KeyCondition.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ class KeyCondition
151151
static Block getBlockWithConstants(
152152
const ASTPtr & query,
153153
const TreeRewriterResultPtr & syntax_analyzer_result,
154-
ContextPtr context);
154+
ContextPtr context,
155+
bool is_projection_optimized = false);
155156

156157
static std::optional<Range> applyMonotonicFunctionsChainToRange(
157158
Range key_range,

src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
8484

8585
auto block_with_constants = KeyCondition::getBlockWithConstants(select_query_info.query->clone(),
8686
select_query_info.syntax_analyzer_result,
87-
context);
87+
context,
88+
select_query_info.is_projection_optimized);
8889

8990
WhereOptimizerContext where_optimizer_context;
9091
where_optimizer_context.context = context;

src/Storages/SelectQueryInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ struct SelectQueryInfo
249249
bool merge_tree_empty_result = false;
250250
bool settings_limit_offset_done = false;
251251
bool is_internal = false;
252+
bool is_projection_optimized = false;
252253
Block minmax_count_projection_block;
253254
MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr;
254255

0 commit comments

Comments
 (0)