Skip to content

Commit e9a0f5f

Browse files
committed
ORCA: Introduce hash windows aggregation when use the vectorization exector
In this PR, ORCA now supports generating `WindowHashAgg` plans which already have implementation in the vectorization executor. However, the CBDB row executor currently lacks implementation for the WindowHashAgg operator. To prevent ORCA from generating this operator in the row executor, I've added an struct which named `OptimizerOptions` to control the plan for row executor or vectorization executor. (By the way, ORCA may later generate plans specifically for the vectorization executor). The `WindowAgg` operator implemention in the vectorization execution is: 1. First, sorting the input rows by `ORDER BY` keys 2. Then do the `PARTITION` by `PARTITION BY` keys 3. Finally do the window function. Since step1 must be globally sorted, it cannot be parallelized in the vectorization executor. This results in poor performance of the `WindowAgg` operator. By contrast, `WindowHashAgg` employs a more efficient approach: 1. First hashes input data into buckets based on `PARTITION BY` keys 2. Then sorts data `within each bucket` according to `ORDER BY` keys 3. Finally computes window functions on the sorted bucket data For the row engine, `WindowHashAgg` operators will not be generated. Also current commit introduces a new GUC named `optimizer_force_window_hash_agg` to force generate plans with `WindowHashAgg` (Don't used this GUC expect debug ORCA). Co-Author-By: zhangyue <zhangyue@hashdata.cn>
1 parent 8b01eaf commit e9a0f5f

File tree

60 files changed

+1264
-156
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1264
-156
lines changed

contrib/pg_stat_statements/pg_stat_statements.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,8 @@ static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
313313
static PlannedStmt *pgss_planner(Query *parse,
314314
const char *query_string,
315315
int cursorOptions,
316-
ParamListInfo boundParams);
316+
ParamListInfo boundParams,
317+
OptimizerOptions *optimizer_options);
317318
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
318319
static void pgss_ExecutorRun(QueryDesc *queryDesc,
319320
ScanDirection direction,
@@ -866,7 +867,8 @@ static PlannedStmt *
866867
pgss_planner(Query *parse,
867868
const char *query_string,
868869
int cursorOptions,
869-
ParamListInfo boundParams)
870+
ParamListInfo boundParams,
871+
OptimizerOptions *optimizer_options)
870872
{
871873
PlannedStmt *result;
872874

@@ -908,10 +910,10 @@ pgss_planner(Query *parse,
908910
{
909911
if (prev_planner_hook)
910912
result = prev_planner_hook(parse, query_string, cursorOptions,
911-
boundParams);
913+
boundParams, optimizer_options);
912914
else
913915
result = standard_planner(parse, query_string, cursorOptions,
914-
boundParams);
916+
boundParams, optimizer_options);
915917
}
916918
PG_FINALLY();
917919
{
@@ -945,10 +947,10 @@ pgss_planner(Query *parse,
945947
{
946948
if (prev_planner_hook)
947949
result = prev_planner_hook(parse, query_string, cursorOptions,
948-
boundParams);
950+
boundParams, optimizer_options);
949951
else
950952
result = standard_planner(parse, query_string, cursorOptions,
951-
boundParams);
953+
boundParams, optimizer_options);
952954
}
953955

954956
return result;

gpcontrib/pg_hint_plan/pg_hint_plan.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ pg_hint_plan_add_paths_to_joinrel(PlannerInfo *root,
428428
static void *external_plan_hint_hook(Query *parse);
429429
#endif
430430
static PlannedStmt *pg_hint_plan_planner(Query *parse, const char *query_string, int cursorOptions,
431-
ParamListInfo boundParams);
431+
ParamListInfo boundParams, OptimizerOptions *optimizer_options);
432432
static RelOptInfo *pg_hint_plan_join_search(PlannerInfo *root,
433433
int levels_needed,
434434
List *initial_rels);
@@ -3118,7 +3118,7 @@ pg_hint_plan_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
31183118
*/
31193119
static PlannedStmt *
31203120
pg_hint_plan_planner(Query *parse, const char *query_string,
3121-
int cursorOptions, ParamListInfo boundParams)
3121+
int cursorOptions, ParamListInfo boundParams, OptimizerOptions *optimizer_options)
31223122
{
31233123
int save_nestlevel;
31243124
PlannedStmt *result;
@@ -3236,9 +3236,9 @@ pg_hint_plan_planner(Query *parse, const char *query_string,
32363236
}
32373237

32383238
if (prev_planner)
3239-
result = (*prev_planner) (parse, query_string, cursorOptions, boundParams);
3239+
result = (*prev_planner) (parse, query_string, cursorOptions, boundParams, optimizer_options);
32403240
else
3241-
result = standard_planner(parse, query_string, cursorOptions, boundParams);
3241+
result = standard_planner(parse, query_string, cursorOptions, boundParams, optimizer_options);
32423242

32433243
current_hint_str = prev_hint_str;
32443244
recurse_level--;
@@ -3298,9 +3298,9 @@ pg_hint_plan_planner(Query *parse, const char *query_string,
32983298
}
32993299
current_hint_state = NULL;
33003300
if (prev_planner)
3301-
result = (*prev_planner) (parse, query_string, cursorOptions, boundParams);
3301+
result = (*prev_planner) (parse, query_string, cursorOptions, boundParams, optimizer_options);
33023302
else
3303-
result = standard_planner(parse, query_string, cursorOptions, boundParams);
3303+
result = standard_planner(parse, query_string, cursorOptions, boundParams, optimizer_options);
33043304

33053305
/* The upper-level planner still needs the current hint state */
33063306
if (HintStateStack != NIL)

src/backend/cdb/cdbllize.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,7 @@ motion_sanity_walker(Node *node, sanity_result_t *result)
15061506
{
15071507
case T_Result:
15081508
case T_WindowAgg:
1509+
case T_WindowHashAgg:
15091510
case T_TableFunctionScan:
15101511
case T_ShareInputScan:
15111512
case T_Append:

src/backend/cdb/cdbplan.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,25 @@ plan_tree_mutator(Node *node,
694694
return (Node *) newwindow;
695695
}
696696
break;
697+
case T_WindowHashAgg:
698+
{
699+
WindowHashAgg *window = (WindowHashAgg *) node;
700+
WindowHashAgg *newwindow;
701+
702+
FLATCOPY(newwindow, window, WindowHashAgg);
703+
PLANMUTATE(newwindow, window);
704+
705+
COPYARRAY(newwindow, window, partNumCols, partColIdx);
706+
COPYARRAY(newwindow, window, partNumCols, partOperators);
707+
708+
COPYARRAY(newwindow, window, ordNumCols, ordColIdx);
709+
COPYARRAY(newwindow, window, ordNumCols, ordOperators);
710+
MUTATE(newwindow->startOffset, window->startOffset, Node *);
711+
MUTATE(newwindow->endOffset, window->endOffset, Node *);
712+
713+
return (Node *) newwindow;
714+
}
715+
break;
697716

698717
case T_Unique:
699718
{

src/backend/cdb/cdbtargeteddispatch.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ DirectDispatchUpdateContentIdsFromPlan(PlannerInfo *root, Plan *plan)
522522
DisableTargetedDispatch(&dispatchInfo);
523523
break;
524524
case T_WindowAgg:
525+
case T_WindowHashAgg:
525526
case T_TableFunctionScan:
526527
case T_RecursiveUnion:
527528
/* no change to dispatchInfo */

src/backend/gpopt/CGPOptimizer.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ PlannedStmt *
4545
CGPOptimizer::GPOPTOptimizedPlan(
4646
Query *query,
4747
bool *
48-
had_unexpected_failure // output : set to true if optimizer unexpectedly failed to produce plan
48+
had_unexpected_failure, // output : set to true if optimizer unexpectedly failed to produce plan
49+
OptimizerOptions *opts
4950
)
5051
{
5152
SOptContext gpopt_context;
@@ -55,7 +56,7 @@ CGPOptimizer::GPOPTOptimizedPlan(
5556

5657
GPOS_TRY
5758
{
58-
plStmt = COptTasks::GPOPTOptimizedPlan(query, &gpopt_context);
59+
plStmt = COptTasks::GPOPTOptimizedPlan(query, &gpopt_context, opts);
5960
// clean up context
6061
gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlStmt);
6162
}
@@ -199,9 +200,9 @@ CGPOptimizer::TerminateGPOPT()
199200
//---------------------------------------------------------------------------
200201
extern "C" {
201202
PlannedStmt *
202-
GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure)
203+
GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure, OptimizerOptions *opts)
203204
{
204-
return CGPOptimizer::GPOPTOptimizedPlan(query, had_unexpected_failure);
205+
return CGPOptimizer::GPOPTOptimizedPlan(query, had_unexpected_failure, opts);
205206
}
206207
}
207208

src/backend/gpopt/config/CConfigParamMapping.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,12 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
326326
false, // m_negate_param
327327
GPOS_WSZ_LIT(
328328
"Disable the dynamic seq/bitmap/index scan in partition table")},
329+
330+
{EopttraceEnableWindowHashAgg, &optimizer_force_window_hash_agg,
331+
false, // m_negate_param
332+
GPOS_WSZ_LIT(
333+
"Enable create window hash agg")},
334+
329335
};
330336

331337
//---------------------------------------------------------------------------
@@ -339,7 +345,8 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
339345
CBitSet *
340346
CConfigParamMapping::PackConfigParamInBitset(
341347
CMemoryPool *mp,
342-
ULONG xform_id // number of available xforms
348+
ULONG xform_id, // number of available xforms
349+
BOOL create_vec_plan
343350
)
344351
{
345352
CBitSet *traceflag_bitset = GPOS_NEW(mp) CBitSet(mp, EopttraceSentinel);
@@ -561,6 +568,10 @@ CConfigParamMapping::PackConfigParamInBitset(
561568
GPOPT_DISABLE_XFORM_TF(CXform::ExfRightOuterJoin2HashJoin));
562569
}
563570

571+
if (create_vec_plan) {
572+
traceflag_bitset->ExchangeSet(EopttraceEnableWindowHashAgg);
573+
}
574+
564575
return traceflag_bitset;
565576
}
566577

0 commit comments

Comments
 (0)