Skip to content

Commit 8374618

Browse files
committed
Introduce hash windows aggregation when enable vectorization
1 parent 7ec9bdf commit 8374618

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+731
-39
lines changed

src/backend/gpopt/CGPOptimizer.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ PlannedStmt *
4545
CGPOptimizer::GPOPTOptimizedPlan(
4646
Query *query,
4747
bool *
48-
had_unexpected_failure // output : set to true if optimizer unexpectedly failed to produce plan
48+
had_unexpected_failure, // output : set to true if optimizer unexpectedly failed to produce plan
49+
OptimizerOptions *opts
4950
)
5051
{
5152
SOptContext gpopt_context;
@@ -55,7 +56,7 @@ CGPOptimizer::GPOPTOptimizedPlan(
5556

5657
GPOS_TRY
5758
{
58-
plStmt = COptTasks::GPOPTOptimizedPlan(query, &gpopt_context);
59+
plStmt = COptTasks::GPOPTOptimizedPlan(query, &gpopt_context, opts);
5960
// clean up context
6061
gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlStmt);
6162
}
@@ -199,9 +200,9 @@ CGPOptimizer::TerminateGPOPT()
199200
//---------------------------------------------------------------------------
200201
extern "C" {
201202
PlannedStmt *
202-
GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure)
203+
GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure, OptimizerOptions *opts)
203204
{
204-
return CGPOptimizer::GPOPTOptimizedPlan(query, had_unexpected_failure);
205+
return CGPOptimizer::GPOPTOptimizedPlan(query, had_unexpected_failure, opts);
205206
}
206207
}
207208

src/backend/gpopt/config/CConfigParamMapping.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,12 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
326326
false, // m_negate_param
327327
GPOS_WSZ_LIT(
328328
"Disable the dynamic seq/bitmap/index scan in partition table")},
329+
330+
{EopttraceEnableWindowHashAgg, &optimizer_enable_window_hash_agg,
331+
false, // m_negate_param
332+
GPOS_WSZ_LIT(
333+
"Enable create window hash agg")},
334+
329335
};
330336

331337
//---------------------------------------------------------------------------
@@ -339,7 +345,8 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
339345
CBitSet *
340346
CConfigParamMapping::PackConfigParamInBitset(
341347
CMemoryPool *mp,
342-
ULONG xform_id // number of available xforms
348+
ULONG xform_id, // number of available xforms
349+
BOOL create_vec_plan
343350
)
344351
{
345352
CBitSet *traceflag_bitset = GPOS_NEW(mp) CBitSet(mp, EopttraceSentinel);
@@ -561,6 +568,10 @@ CConfigParamMapping::PackConfigParamInBitset(
561568
GPOPT_DISABLE_XFORM_TF(CXform::ExfRightOuterJoin2HashJoin));
562569
}
563570

571+
if (create_vec_plan) {
572+
traceflag_bitset->ExchangeSet(EopttraceEnableWindowHashAgg);
573+
}
574+
564575
return traceflag_bitset;
565576
}
566577

src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3336,6 +3336,8 @@ CTranslatorDXLToPlStmt::TranslateDXLWindow(
33363336
}
33373337
}
33383338

3339+
window->isWindowHashAgg = window_dxlop->IsWindowHashAgg();
3340+
33393341
SetParamIds(plan);
33403342

33413343
// cleanup

src/backend/gpopt/utils/COptTasks.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,7 @@ COptTasks::OptimizeTask(void *ptr)
913913
{
914914
// set trace flags
915915
trace_flags = CConfigParamMapping::PackConfigParamInBitset(
916-
mp, CXform::ExfSentinel);
916+
mp, CXform::ExfSentinel, opt_ctxt->m_create_vec_plan);
917917
SetTraceflags(mp, trace_flags, &enabled_trace_flags,
918918
&disabled_trace_flags);
919919

@@ -1156,13 +1156,15 @@ COptTasks::Optimize(Query *query)
11561156
//
11571157
//---------------------------------------------------------------------------
11581158
PlannedStmt *
1159-
COptTasks::GPOPTOptimizedPlan(Query *query, SOptContext *gpopt_context)
1159+
COptTasks::GPOPTOptimizedPlan(Query *query, SOptContext *gpopt_context, OptimizerOptions *opts)
11601160
{
11611161
Assert(query);
11621162
Assert(gpopt_context);
11631163

11641164
gpopt_context->m_query = query;
11651165
gpopt_context->m_should_generate_plan_stmt = true;
1166+
// Copy options in `OptimizerOptions` to `SOptContext`
1167+
gpopt_context->m_create_vec_plan = opts->create_vectorization_plan;
11661168
Execute(&OptimizeTask, gpopt_context);
11671169
return gpopt_context->m_plan_stmt;
11681170
}

src/backend/gporca/libgpdbcost/include/gpdbcost/CCostModelGPDB.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ class CCostModelGPDB : public ICostModel
107107
const CCostModelGPDB *pcmgpdb,
108108
const SCostingInfo *pci);
109109

110+
// cost of hash sequence project
111+
static CCost CostHashSequenceProject(CMemoryPool *mp,
112+
CExpressionHandle &exprhdl,
113+
const CCostModelGPDB *pcmgpdb,
114+
const SCostingInfo *pci);
115+
110116
// cost of CTE producer
111117
static CCost CostCTEProducer(CMemoryPool *mp, CExpressionHandle &exprhdl,
112118
const CCostModelGPDB *pcmgpdb,

src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "gpopt/operators/CPhysicalMotionBroadcast.h"
3232
#include "gpopt/operators/CPhysicalPartitionSelector.h"
3333
#include "gpopt/operators/CPhysicalSequenceProject.h"
34+
#include "gpopt/operators/CPhysicalHashSequenceProject.h"
3435
#include "gpopt/operators/CPhysicalStreamAgg.h"
3536
#include "gpopt/operators/CPhysicalUnionAll.h"
3637
#include "gpopt/operators/CPredicateUtils.h"
@@ -1594,11 +1595,17 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
15941595
GPOS_ASSERT(COperator::EopPhysicalSequenceProject ==
15951596
exprhdl.Pop()->Eopid());
15961597

1598+
const CDouble dTupDefaultProcCostUnit =
1599+
pcmgpdb->GetCostModelParams()
1600+
->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)
1601+
->Get();
1602+
GPOS_ASSERT(0 < dTupDefaultProcCostUnit);
1603+
15971604
CPhysicalSequenceProject *psp = CPhysicalSequenceProject::PopConvert(exprhdl.Pop());
15981605

1599-
if (GPOS_FTRACE(EopttraceForceSplitWindowFunc) &&
1606+
if (GPOS_FTRACE(EopttraceForceSplitWindowFunc) &&
16001607
psp->Pspt() == COperator::EsptypeGlobalTwoStep) {
1601-
return CCost(0);
1608+
return CCost(dTupDefaultProcCostUnit * 2);
16021609
}
16031610

16041611
const DOUBLE num_rows_outer = pci->PdRows()[0];
@@ -1614,12 +1621,59 @@ CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
16141621
ulSortCols += pos->UlSortColumns();
16151622
}
16161623

1624+
// we process (sorted window of) input tuples to compute window function values
1625+
CCost costLocal =
1626+
CCost(pci->NumRebinds() * (ulSortCols * num_rows_outer * dWidthOuter *
1627+
dTupDefaultProcCostUnit));
1628+
CCost costChild =
1629+
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
1630+
1631+
return costLocal + costChild;
1632+
}
1633+
1634+
//---------------------------------------------------------------------------
1635+
// @function:
1636+
// CCostModelGPDB::CostSequenceProject
1637+
//
1638+
// @doc:
1639+
// Cost of sequence project
1640+
//
1641+
//---------------------------------------------------------------------------
1642+
CCost
1643+
CCostModelGPDB::CostHashSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
1644+
const CCostModelGPDB *pcmgpdb,
1645+
const SCostingInfo *pci)
1646+
{
1647+
GPOS_ASSERT(nullptr != pcmgpdb);
1648+
GPOS_ASSERT(nullptr != pci);
1649+
GPOS_ASSERT(COperator::EopPhysicalHashSequenceProject ==
1650+
exprhdl.Pop()->Eopid());
16171651
const CDouble dTupDefaultProcCostUnit =
16181652
pcmgpdb->GetCostModelParams()
16191653
->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)
16201654
->Get();
16211655
GPOS_ASSERT(0 < dTupDefaultProcCostUnit);
16221656

1657+
CPhysicalHashSequenceProject *psp = CPhysicalHashSequenceProject::PopConvert(exprhdl.Pop());
1658+
1659+
if (GPOS_FTRACE(EopttraceForceSplitWindowFunc) &&
1660+
psp->Pspt() == COperator::EsptypeGlobalTwoStep) {
1661+
return CCost(dTupDefaultProcCostUnit);
1662+
}
1663+
1664+
const DOUBLE num_rows_outer = pci->PdRows()[0];
1665+
const DOUBLE dWidthOuter = pci->GetWidth()[0];
1666+
1667+
ULONG ulSortCols = 0;
1668+
COrderSpecArray *pdrgpos =
1669+
CPhysicalHashSequenceProject::PopConvert(psp)->Pdrgpos();
1670+
const ULONG ulOrderSpecs = pdrgpos->Size();
1671+
for (ULONG ul = 0; ul < ulOrderSpecs; ul++)
1672+
{
1673+
COrderSpec *pos = (*pdrgpos)[ul];
1674+
ulSortCols += pos->UlSortColumns();
1675+
}
1676+
16231677
// we process (sorted window of) input tuples to compute window function values
16241678
CCost costLocal =
16251679
CCost(pci->NumRebinds() * (ulSortCols * num_rows_outer * dWidthOuter *
@@ -2462,18 +2516,26 @@ CCostModelGPDB::Cost(
24622516
return CostSequenceProject(m_mp, exprhdl, this, pci);
24632517
}
24642518

2519+
case COperator::EopPhysicalHashSequenceProject:
2520+
{
2521+
return CostHashSequenceProject(m_mp, exprhdl, this, pci);
2522+
}
2523+
24652524
case COperator::EopPhysicalCTEProducer:
24662525
{
24672526
return CostCTEProducer(m_mp, exprhdl, this, pci);
24682527
}
2528+
24692529
case COperator::EopPhysicalCTEConsumer:
24702530
{
24712531
return CostCTEConsumer(m_mp, exprhdl, this, pci);
24722532
}
2533+
24732534
case COperator::EopPhysicalConstTableGet:
24742535
{
24752536
return CostConstTableGet(m_mp, exprhdl, this, pci);
24762537
}
2538+
24772539
case COperator::EopPhysicalDML:
24782540
{
24792541
return CostDML(m_mp, exprhdl, this, pci);

src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpec.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ class CDistributionSpec : public CPropSpec
179179
}
180180
// print
181181
IOstream &OsPrint(IOstream &os) const override = 0;
182+
virtual IOstream &OsPrintNoWrap(IOstream &os) const{
183+
return OsPrint(os);
184+
}
182185

183186
// return distribution partitioning type
184187
virtual EDistributionPartitioningType Edpt() const = 0;

src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpecHashed.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ class CDistributionSpecHashed : public CDistributionSpecRandom
178178

179179
// print
180180
IOstream &OsPrint(IOstream &os) const override;
181-
IOstream &OsPrintWithPrefix(IOstream &os, const char *prefix) const;
181+
IOstream &OsPrintNoWrap(IOstream &os) const override;
182+
IOstream &OsPrintWithPrefix(IOstream &os, const char *prefix, BOOL wrap = true) const;
182183

183184
// return a hashed distribution on the maximal hashable subset of given columns
184185
static CDistributionSpecHashed *PdshashedMaximal(CMemoryPool *mp,

src/backend/gporca/libgpopt/include/gpopt/operators/COperator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ class COperator : public CRefCount, public DbgPrintMixin<COperator>
213213
EopPhysicalCTEProducer,
214214
EopPhysicalCTEConsumer,
215215
EopPhysicalSequenceProject,
216+
EopPhysicalHashSequenceProject,
216217
EopPhysicalDynamicIndexScan,
217218

218219
EopPhysicalInnerHashJoin,
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
* CPhysicalHashSequenceProject.h
21+
*
22+
* IDENTIFICATION
23+
* src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalHashSequenceProject.h
24+
*
25+
*-------------------------------------------------------------------------
26+
*/
27+
28+
#ifndef GPOPT_CPhysicalHashSequenceProject_H
29+
#define GPOPT_CPhysicalHashSequenceProject_H
30+
31+
#include "gpos/base.h"
32+
33+
#include "gpopt/base/CWindowFrame.h"
34+
#include "gpopt/operators/CPhysicalSequenceProject.h"
35+
36+
namespace gpopt
37+
{
38+
// fwd declarations
39+
class CDistributionSpec;
40+
41+
//---------------------------------------------------------------------------
42+
// @class:
43+
// CPhysicalHashSequenceProject
44+
//
45+
// @doc:
46+
// Physical Hash Sequence Project operator
47+
//
48+
//---------------------------------------------------------------------------
49+
class CPhysicalHashSequenceProject : public CPhysicalSequenceProject
50+
{
51+
public:
52+
CPhysicalHashSequenceProject(const CPhysicalHashSequenceProject &) = delete;
53+
54+
// ctor
55+
CPhysicalHashSequenceProject(CMemoryPool *mp, ESPType m_sptype,
56+
CDistributionSpec *pds,
57+
COrderSpecArray *pdrgpos,
58+
CWindowFrameArray *pdrgpwf);
59+
60+
// dtor
61+
~CPhysicalHashSequenceProject() override;
62+
63+
// ident accessors
64+
EOperatorId
65+
Eopid() const override
66+
{
67+
return EopPhysicalHashSequenceProject;
68+
}
69+
70+
// operator name
71+
const CHAR *
72+
SzId() const override
73+
{
74+
return "CPhysicalHashSequenceProject";
75+
}
76+
77+
// match function
78+
BOOL Matches(COperator *pop) const override;
79+
80+
// hashing function
81+
ULONG HashValue() const override;
82+
83+
// sensitivity to order of inputs
84+
BOOL FInputOrderSensitive() const override;
85+
86+
// compute required sort order of the n-th child
87+
COrderSpec *PosRequired(CMemoryPool *mp, CExpressionHandle &exprhdl,
88+
COrderSpec *posRequired, ULONG child_index,
89+
CDrvdPropArray *pdrgpdpCtxt,
90+
ULONG ulOptReq) const override;
91+
92+
// return order property enforcing type for this operator
93+
CEnfdProp::EPropEnforcingType EpetOrder(
94+
CExpressionHandle &exprhdl, const CEnfdOrder *peo) const override;
95+
96+
// return true if operator passes through stats obtained from children,
97+
// this is used when computing stats during costing
98+
BOOL
99+
FPassThruStats() const override
100+
{
101+
return true;
102+
}
103+
104+
// print
105+
IOstream &OsPrint(IOstream &os) const override;
106+
107+
// conversion function
108+
static CPhysicalHashSequenceProject *
109+
PopConvert(COperator *pop)
110+
{
111+
GPOS_ASSERT(nullptr != pop);
112+
GPOS_ASSERT(EopPhysicalHashSequenceProject == pop->Eopid());
113+
114+
return dynamic_cast<CPhysicalHashSequenceProject *>(pop);
115+
}
116+
117+
}; // class CPhysicalHashSequenceProject
118+
119+
} // namespace gpopt
120+
121+
#endif // !GPOPT_CPhysicalHashSequenceProject_H
122+
123+
// EOF

0 commit comments

Comments
 (0)