1515#include < Parsers/ExpressionListParsers.h>
1616#include < Parsers/parseQuery.h>
1717#include < Parsers/FunctionParameterValuesVisitor.h>
18+ #include < Parsers/formatAST.h>
1819
1920#include < Access/Common/AccessFlags.h>
2021#include < Access/ContextAccess.h>
@@ -646,14 +647,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
646647 if (const auto & column_sizes = storage->getColumnSizes (); !column_sizes.empty () && metadata_snapshot->hasPrimaryKey ())
647648 {
648649 const auto primary_keys = metadata_snapshot->getPrimaryKeyColumns ();
649- const auto main_table_name = getTableName (query.tables ());
650- bool optimized = false ;
651- auto pkoptimized_where_ast = pkOptimization (metadata_snapshot->getProjections (), query.where (), primary_keys, main_table_name, optimized);
652- if (optimized)
653- {
654- query.setExpression (ASTSelectQuery::Expression::WHERE, std::move (pkoptimized_where_ast));
655- options.is_projection_optimized = true ;
656- }
650+ auto pkoptimized_where_ast = pkOptimization (metadata_snapshot->getProjections (), query.where (), primary_keys);
651+ query.setExpression (ASTSelectQuery::Expression::WHERE, std::move (pkoptimized_where_ast));
652+ options.is_projection_optimized = true ;
657653 }
658654 }
659655
@@ -673,6 +669,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
673669 SelectQueryInfo current_info;
674670 current_info.query = query_ptr;
675671 current_info.syntax_analyzer_result = syntax_analyzer_result;
672+ current_info.is_projection_optimized = options.is_projection_optimized ;
676673
677674 Names queried_columns = syntax_analyzer_result->requiredSourceColumns ();
678675 const auto & supported_prewhere_columns = storage->supportedPrewhereColumns ();
@@ -2179,9 +2176,7 @@ bool InterpreterSelectQuery::shouldMoveToPrewhere() const
21792176ASTPtr InterpreterSelectQuery::pkOptimization (
21802177 const ProjectionsDescription & projections,
21812178 const ASTPtr & where_ast,
2182- const Names & primary_keys,
2183- const String & main_table,
2184- bool & optimized) const
2179+ const Names & primary_keys) const
21852180{
21862181 NameSet proj_pks = {};
21872182 for (auto & projection: projections)
@@ -2203,82 +2198,99 @@ ASTPtr InterpreterSelectQuery::pkOptimization(
22032198 return where_ast;
22042199 }
22052200 }
2206-
22072201 }
22082202
2209- const auto and_function = makeASTFunction (" and" );
2210-
2211- // for keys in where_ast
2212- NameSet optimized_where_keys = {};
2213- analyze_where_ast (where_ast, and_function, proj_pks, optimized_where_keys, primary_keys, main_table, optimized);
2214- if (optimized)
2215- {
2216- and_function->arguments ->children .push_back (where_ast->clone ());
2217- return and_function;
2218- }
2219- return where_ast;
2203+ return analyze_where_ast (where_ast, proj_pks, primary_keys);
22202204}
22212205
2222- void InterpreterSelectQuery::analyze_where_ast (
2206+ ASTPtr InterpreterSelectQuery::analyze_where_ast (
22232207 const ASTPtr & ast,
2224- const ASTPtr & func,
22252208 NameSet & proj_pks,
2226- NameSet & optimized_where_keys,
2227- const Names & primary_keys,
2228- const String & main_table,
2229- bool & optimized) const
2209+ const Names & primary_keys) const
22302210{
2231- if (optimized)
2232- return ;
2233-
2234- // Does not support other condition except for "="
2235- if (const auto * ast_function_node = ast->as <ASTFunction>())
2211+ bool contains_pk = false ;
2212+ if (const auto ast_function_node = ast->as <ASTFunction>())
22362213 {
22372214 auto arg_size = ast_function_node->arguments ? ast_function_node->arguments ->children .size () : 0 ;
2215+ // Does not support other condition except for "="
22382216 if (ast_function_node->name == " equals" && arg_size == 2 )
22392217 {
22402218 auto lhs_argument = ast_function_node->arguments ->children .at (0 );
22412219 auto rhs_argument = ast_function_node->arguments ->children .at (1 );
22422220 String lhs = getIdentifier (lhs_argument);
22432221 String rhs = getIdentifier (rhs_argument);
22442222 auto col_name = (!lhs.empty ()) ? lhs:rhs;
2245- bool contains_pk = false ;
2223+ contains_pk = false ;
22462224 if (std::find (primary_keys.begin (), primary_keys.end (), col_name) != primary_keys.end ())
22472225 contains_pk = true ;
22482226
2249- if (proj_pks.contains (col_name) && !optimized_where_keys. contains (col_name) && ! contains_pk)
2227+ if (proj_pks.contains (col_name) && !contains_pk)
22502228 {
2251- optimized_where_keys.insert (col_name);
2252- ASTPtr new_ast = create_proj_optimized_ast (ast, primary_keys, main_table);
2253- auto * function_node = func->as <ASTFunction>();
2254- function_node->arguments ->children .push_back (new_ast);
2255- optimized = true ;
2256- return ;
2229+ ASTPtr rewrite_ast = create_proj_optimized_ast (ast, primary_keys);
2230+ auto and_func = makeASTFunction (" and" , std::move (rewrite_ast), ast->clone ());
2231+ return and_func;
2232+ }
2233+ }
2234+ else if (ast_function_node->name == " in" )
2235+ {
2236+ ASTPtr rewrite_ast;
2237+ const auto * subquery = ast_function_node->arguments ->children [1 ]->as <ASTSubquery>();
2238+ if (!subquery)
2239+ {
2240+ auto lhs_argument = ast_function_node->arguments ->children .at (0 );
2241+ contains_pk = false ;
2242+ bool proj_pks_contains = false ;
2243+ if (auto func = lhs_argument->as <ASTFunction>())
2244+ {
2245+ if (func->name == " tuple" )
2246+ {
2247+ for (auto key : func->arguments ->children )
2248+ {
2249+ String col_name = getIdentifier (key);
2250+ if (std::find (primary_keys.begin (), primary_keys.end (), col_name) != primary_keys.end ())
2251+ contains_pk = true ;
2252+ if (proj_pks.contains (col_name))
2253+ proj_pks_contains = true ;
2254+ }
2255+ }
2256+ }
2257+ else
2258+ {
2259+ String col_name = getIdentifier (lhs_argument);
2260+ if (std::find (primary_keys.begin (), primary_keys.end (), col_name) != primary_keys.end ())
2261+ contains_pk = true ;
2262+ if (proj_pks.contains (col_name))
2263+ proj_pks_contains = true ;
2264+ }
2265+
2266+ if (proj_pks_contains && !contains_pk)
2267+ {
2268+ rewrite_ast = create_proj_optimized_ast (ast, primary_keys);
2269+ auto and_func = makeASTFunction (" and" , std::move (rewrite_ast), ast->clone ());
2270+ return and_func;
2271+ }
22572272 }
22582273 }
22592274 else if (ast_function_node->name == " and" || ast_function_node->name == " or" )
22602275 {
2276+ auto current_func = makeASTFunction (ast_function_node->name );
22612277 for (size_t i = 0 ; i < arg_size; i++)
22622278 {
22632279 auto argument = ast_function_node->arguments ->children [i];
2264- analyze_where_ast (argument, func, proj_pks, optimized_where_keys, primary_keys, main_table, optimized);
2280+ auto new_ast = analyze_where_ast (argument, proj_pks, primary_keys);
2281+ current_func->arguments ->children .push_back (std::move (new_ast));
22652282 }
2266- }
2267- else /* TBD: conditions that are not "=" */
2268- {
2269- return ;
2283+ return current_func;
22702284 }
22712285 }
2272- else
2273- {
2274- return ;
2275- }
2286+ return ast;
22762287}
22772288
22782289/* *
22792290 * @brief Manually rewrite the WHERE query, Insert a new where condition in order to
22802291 * leverage projection features
22812292 *
2293+ * Storage is not empty while calling this function
22822294 * For example, a qualified table with projection
22832295 * CREATE TABLE test_a(`src` String,`dst` String, `other_cols` String,
22842296 * PROJECTION p1(SELECT src, dst ORDER BY dst)) ENGINE = MergeTree ORDER BY src;
@@ -2289,7 +2301,7 @@ void InterpreterSelectQuery::analyze_where_ast(
22892301 * The following code will convert this select query to the following
22902302 * select * from test_a where src in (select src from test_a where dst='-42') and dst='-42';
22912303 */
2292- ASTPtr InterpreterSelectQuery::create_proj_optimized_ast (const ASTPtr & ast, const Names & primary_keys, const String & main_table ) const
2304+ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast (const ASTPtr & ast, const Names & primary_keys) const
22932305{
22942306 auto select_query = std::make_shared<ASTSelectQuery>();
22952307 select_query->setExpression (ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
@@ -2299,7 +2311,7 @@ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, con
22992311 auto tables_elem = std::make_shared<ASTTablesInSelectQueryElement>();
23002312 auto table_expr = std::make_shared<ASTTableExpression>();
23012313
2302- table_expr->database_and_table_name = std::make_shared<ASTTableIdentifier>(main_table );
2314+ table_expr->database_and_table_name = std::make_shared<ASTTableIdentifier>(table_id );
23032315 table_expr->children .push_back (table_expr->database_and_table_name );
23042316
23052317 tables_elem->table_expression = std::move (table_expr);
@@ -2311,6 +2323,7 @@ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, con
23112323 select_query->setExpression (ASTSelectQuery::Expression::WHERE, ast->clone ());
23122324
23132325 select_with_union_query->list_of_selects ->children .push_back (select_query);
2326+ select_with_union_query->children .push_back (select_with_union_query->list_of_selects );
23142327
23152328 auto subquery = std::make_shared<ASTSubquery>();
23162329 subquery->children .push_back (select_with_union_query);
@@ -2332,8 +2345,8 @@ ASTPtr InterpreterSelectQuery::create_proj_optimized_ast(const ASTPtr & ast, con
23322345 in_function->arguments ->children .push_back (tuples);
23332346 }
23342347 in_function->arguments ->children .push_back (subquery);
2335- const auto indexHintFunc = makeASTFunction ( " indexHint " , in_function);
2336- return indexHintFunc ;
2348+
2349+ return makeASTFunction ( " indexHint " , std::move (in_function)) ;
23372350}
23382351
23392352// / Note that this is const and accepts the analysis ref to be able to use it to do analysis for parallel replicas
@@ -3511,14 +3524,4 @@ String InterpreterSelectQuery::getIdentifier(ASTPtr & argument) const
35113524 return getIdentifier (argument->children .at (0 ));
35123525}
35133526
3514- String InterpreterSelectQuery::getTableName (const ASTPtr & tables_in_select_query_ast) const
3515- {
3516- const auto & tables_in_select_query = tables_in_select_query_ast->as <ASTTablesInSelectQuery &>();
3517- const auto & tables_element = tables_in_select_query.children [0 ]->as <ASTTablesInSelectQueryElement &>();
3518- const auto & table_expression = tables_element.table_expression ->as <ASTTableExpression &>();
3519- auto table_name = table_expression.database_and_table_name ->as <ASTTableIdentifier>()->getTableId ().table_name ;
3520-
3521- return table_name;
3522- }
3523-
35243527}
0 commit comments