Skip to content

Commit ee56d5a

Browse files
committed
Added support for semi/anti and outer joins to hash join algorithm. Reimplemented support for semi/anti joins inside the nested loop algorithm. Slightly changed implementation of full outer joins. Added transformation of IN/EXISTS subqueries into lateral semi-joins. Basic optimizer support for semi-joins. More efficient optimization for cross joins. Added some debug info (hash table statistics) for hash joins.
1 parent 421a73a commit ee56d5a

File tree

9 files changed

+645
-162
lines changed

9 files changed

+645
-162
lines changed

src/jrd/RecordSourceNodes.cpp

Lines changed: 167 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,124 @@ static void genDeliverUnmapped(CompilerScratch* csb, const BoolExprNodeStack& pa
5252
static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const MetaName& name,
5353
ValueListNode* list, const FieldNode* flawedNode, const TEXT* side, dsql_ctx*& ctx);
5454

55+
namespace
56+
{
57+
// Search through the list of ANDed booleans to find comparisons
58+
// referring streams of other select expressions.
59+
// Extract those booleans and return them to the caller.
60+
61+
bool findDependentBooleans(CompilerScratch* csb,
62+
const StreamList& rseStreams,
63+
BoolExprNode** parentBoolean,
64+
BoolExprNodeStack& booleanStack)
65+
{
66+
const auto boolean = *parentBoolean;
67+
68+
const auto binaryNode = nodeAs<BinaryBoolNode>(boolean);
69+
if (binaryNode && binaryNode->blrOp == blr_and)
70+
{
71+
const bool found1 = findDependentBooleans(csb, rseStreams,
72+
binaryNode->arg1.getAddress(), booleanStack);
73+
const bool found2 = findDependentBooleans(csb, rseStreams,
74+
binaryNode->arg2.getAddress(), booleanStack);
75+
76+
if (!binaryNode->arg1 && !binaryNode->arg2)
77+
*parentBoolean = nullptr;
78+
else if (!binaryNode->arg1)
79+
*parentBoolean = binaryNode->arg2;
80+
else if (!binaryNode->arg2)
81+
*parentBoolean = binaryNode->arg1;
82+
83+
return (found1 || found2);
84+
}
85+
86+
if (const auto cmpNode = nodeAs<ComparativeBoolNode>(boolean))
87+
{
88+
SortedStreamList streams;
89+
cmpNode->collectStreams(streams);
90+
91+
for (const auto stream : streams)
92+
{
93+
if (!rseStreams.exist(stream))
94+
{
95+
booleanStack.push(boolean);
96+
*parentBoolean = nullptr;
97+
return true;
98+
}
99+
}
100+
}
101+
102+
return false;
103+
}
104+
105+
// Search through the list of ANDed booleans to find correlated EXISTS/IN sub-queries.
106+
// They are candidates to be converted into semi- or anti-joins.
107+
108+
bool findPossibleJoins(CompilerScratch* csb,
109+
BoolExprNode** parentBoolean,
110+
RecordSourceNodeStack& rseStack,
111+
BoolExprNodeStack& booleanStack)
112+
{
113+
auto boolNode = *parentBoolean;
114+
115+
const auto binaryNode = nodeAs<BinaryBoolNode>(boolNode);
116+
if (binaryNode && binaryNode->blrOp == blr_and)
117+
{
118+
const bool found1 = findPossibleJoins(csb, binaryNode->arg1.getAddress(),
119+
rseStack, booleanStack);
120+
const bool found2 = findPossibleJoins(csb, binaryNode->arg2.getAddress(),
121+
rseStack, booleanStack);
122+
123+
if (!binaryNode->arg1 && !binaryNode->arg2)
124+
*parentBoolean = nullptr;
125+
else if (!binaryNode->arg1)
126+
*parentBoolean = binaryNode->arg2;
127+
else if (!binaryNode->arg2)
128+
*parentBoolean = binaryNode->arg1;
129+
130+
return (found1 || found2);
131+
}
132+
133+
const auto rseNode = nodeAs<RseBoolNode>(boolNode);
134+
// Both EXISTS (blr_any) and IN (blr_ansi_any) sub-queries are handled
135+
if (rseNode && (rseNode->blrOp == blr_any || rseNode->blrOp == blr_ansi_any))
136+
{
137+
auto rse = rseNode->rse;
138+
fb_assert(rse);
139+
140+
if (rse->rse_boolean)
141+
{
142+
StreamList streams;
143+
rse->computeRseStreams(streams);
144+
145+
BoolExprNodeStack booleans;
146+
if (findDependentBooleans(csb, streams,
147+
rse->rse_boolean.getAddress(),
148+
booleans))
149+
{
150+
fb_assert(booleans.hasData());
151+
auto boolean = booleans.pop();
152+
while (booleans.hasData())
153+
{
154+
const auto andNode = FB_NEW_POOL(csb->csb_pool)
155+
BinaryBoolNode(csb->csb_pool, blr_and);
156+
andNode->arg1 = boolean;
157+
andNode->arg2 = booleans.pop();
158+
boolean = andNode;
159+
}
160+
161+
rse->flags |= RseNode::FLAG_SEMI_JOINED;
162+
rseStack.push(rse);
163+
booleanStack.push(boolean);
164+
*parentBoolean = nullptr;
165+
return true;
166+
}
167+
}
168+
}
169+
170+
return false;
171+
}
172+
}
55173

56174
//--------------------
57175

@@ -2783,6 +2901,9 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb)
27832901
{
27842902
SET_TDBB(tdbb);
27852903

2904+
if (const auto newRse = processPossibleJoins(tdbb, csb))
2905+
return newRse->pass1(tdbb, csb);
2906+
27862907
// for scoping purposes, maintain a stack of RseNode's which are
27872908
// currently being parsed; if there are none on the stack as
27882909
// yet, mark the RseNode as variant to make sure that statement-
@@ -2888,6 +3009,12 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb)
28883009
void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
28893010
BoolExprNode** boolean, RecordSourceNodeStack& stack)
28903011
{
3012+
if (const auto newRse = processPossibleJoins(tdbb, csb))
3013+
{
3014+
newRse->pass1Source(tdbb, csb, rse, boolean, stack);
3015+
return;
3016+
}
3017+
28913018
if (rse_jointype != blr_inner)
28923019
{
28933020
// Check whether any of the upper level booleans (those belonging to the WHERE clause)
@@ -2941,15 +3068,15 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse,
29413068
}
29423069
}
29433070

2944-
// in the case of an RseNode, it is possible that a new RseNode will be generated,
3071+
// In the case of an RseNode, it is possible that a new RseNode will be generated,
29453072
// so wait to process the source before we push it on the stack (bug 8039)
29463073

29473074
// The addition of the JOIN syntax for specifying inner joins causes an
29483075
// RseNode tree to be generated, which is undesirable in the simplest case
29493076
// where we are just trying to inner join more than 2 streams. If possible,
29503077
// try to flatten the tree out before we go any further.
29513078

2952-
if (!isLateral() &&
3079+
if (!isLateral() && !isSemiJoined() &&
29533080
rse->rse_jointype == blr_inner &&
29543081
rse_jointype == blr_inner &&
29553082
!rse_sorted && !rse_projection &&
@@ -3054,11 +3181,11 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr
30543181

30553182
StreamStateHolder stateHolder(csb, opt->getOuterStreams());
30563183

3057-
if (opt->isLeftJoin() || isLateral())
3184+
if (opt->isLeftJoin() || isLateral() || isSemiJoined())
30583185
{
30593186
stateHolder.activate();
30603187

3061-
if (opt->isLeftJoin())
3188+
if (opt->isLeftJoin() || isSemiJoined())
30623189
{
30633190
// Push all conjuncts except "missing" ones (e.g. IS NULL)
30643191
for (auto iter = opt->getConjuncts(false, true); iter.hasData(); ++iter)
@@ -3081,6 +3208,42 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr
30813208
return opt->compile(this, &conjunctStack);
30823209
}
30833210

3211+
RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
3212+
{
3213+
if (rse_jointype != blr_inner || !rse_boolean)
3214+
return nullptr;
3215+
3216+
RecordSourceNodeStack rseStack;
3217+
BoolExprNodeStack booleanStack;
3218+
3219+
// Find possibly joinable sub-queries
3220+
3221+
if (!findPossibleJoins(csb, rse_boolean.getAddress(), rseStack, booleanStack))
3222+
return nullptr;
3223+
3224+
fb_assert(rseStack.hasData() && booleanStack.hasData());
3225+
fb_assert(rseStack.getCount() == booleanStack.getCount());
3226+
3227+
// Create joins between the original node and detected joinable nodes
3228+
3229+
auto rse = this;
3230+
while (rseStack.hasData())
3231+
{
3232+
const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool())
3233+
RseNode(*tdbb->getDefaultPool());
3234+
3235+
newRse->rse_relations.add(rse);
3236+
newRse->rse_relations.add(rseStack.pop());
3237+
3238+
newRse->rse_jointype = blr_inner;
3239+
newRse->rse_boolean = booleanStack.pop();
3240+
3241+
rse = newRse;
3242+
}
3243+
3244+
return rse;
3245+
}
3246+
30843247
// Check that all streams in the RseNode have a plan specified for them.
30853248
// If they are not, there are streams in the RseNode which were not mentioned in the plan.
30863249
void RseNode::planCheck(const CompilerScratch* csb) const

src/jrd/RecordSourceNodes.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -719,15 +719,16 @@ class RseNode final : public TypedNode<RecordSourceNode, RecordSourceNode::TYPE_
719719
public:
720720
enum : USHORT
721721
{
722-
FLAG_VARIANT = 0x01, // variant (not invariant?)
723-
FLAG_SINGULAR = 0x02, // singleton select
724-
FLAG_WRITELOCK = 0x04, // locked for write
725-
FLAG_SCROLLABLE = 0x08, // scrollable cursor
726-
FLAG_DSQL_COMPARATIVE = 0x10, // transformed from DSQL ComparativeBoolNode
727-
FLAG_OPT_FIRST_ROWS = 0x20, // optimize retrieval for first rows
728-
FLAG_LATERAL = 0x40, // lateral derived table
729-
FLAG_SKIP_LOCKED = 0x80, // skip locked
730-
FLAG_SUB_QUERY = 0x100 // sub-query
722+
FLAG_VARIANT = 0x01, // variant (not invariant?)
723+
FLAG_SINGULAR = 0x02, // singleton select
724+
FLAG_WRITELOCK = 0x04, // locked for write
725+
FLAG_SCROLLABLE = 0x08, // scrollable cursor
726+
FLAG_DSQL_COMPARATIVE = 0x10, // transformed from DSQL ComparativeBoolNode
727+
FLAG_OPT_FIRST_ROWS = 0x20, // optimize retrieval for first rows
728+
FLAG_LATERAL = 0x40, // lateral derived table
729+
FLAG_SKIP_LOCKED = 0x80, // skip locked
730+
FLAG_SUB_QUERY = 0x100, // sub-query
731+
FLAG_SEMI_JOINED = 0x200 // participates in semi-join
731732
};
732733

733734
bool isInvariant() const
@@ -755,6 +756,11 @@ class RseNode final : public TypedNode<RecordSourceNode, RecordSourceNode::TYPE_
755756
return (flags & FLAG_SUB_QUERY) != 0;
756757
}
757758

759+
bool isSemiJoined() const
760+
{
761+
return (flags & FLAG_SEMI_JOINED) != 0;
762+
}
763+
758764
bool hasWriteLock() const
759765
{
760766
return (flags & FLAG_WRITELOCK) != 0;
@@ -875,6 +881,7 @@ class RseNode final : public TypedNode<RecordSourceNode, RecordSourceNode::TYPE_
875881
private:
876882
void planCheck(const CompilerScratch* csb) const;
877883
static void planSet(CompilerScratch* csb, PlanNode* plan);
884+
RseNode* processPossibleJoins(thread_db* tdbb, CompilerScratch* csb);
878885

879886
public:
880887
NestConst<ValueExprNode> dsqlFirst;

src/jrd/optimizer/InnerJoin.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ void InnerJoin::calculateStreamInfo()
108108
innerStream->baseIndexes = candidate->indexes;
109109
innerStream->baseUnique = candidate->unique;
110110
innerStream->baseNavigated = candidate->navigated;
111+
innerStream->baseMatches = candidate->matches;
112+
innerStream->baseDependentFromStreams = candidate->dependentFromStreams;
111113

112114
csb->csb_rpt[innerStream->number].deactivate();
113115
}
@@ -573,13 +575,36 @@ River* InnerJoin::formRiver()
573575

574576
// Create a hash join
575577
rsb = FB_NEW_POOL(getPool())
576-
HashJoin(tdbb, csb, 2, hashJoinRsbs, keys.begin(), stream.selectivity);
578+
HashJoin(tdbb, csb, INNER_JOIN, 2, hashJoinRsbs, keys.begin(), stream.selectivity);
577579

578580
// Clear priorly processed rsb's, as they're already incorporated into a hash join
579581
rsbs.clear();
580582
}
581583
else
584+
{
585+
StreamList depStreams;
586+
587+
if (optimizer->isSemiJoined() && rsbs.isEmpty())
588+
{
589+
const auto baseStream = getStreamInfo(stream.number);
590+
for (const auto match : baseStream->baseMatches)
591+
{
592+
if (optimizer->checkEquiJoin(match))
593+
{
594+
for (const auto depStream : baseStream->baseDependentFromStreams)
595+
{
596+
if (match->containsStream(depStream))
597+
depStreams.add(depStream);
598+
}
599+
}
600+
}
601+
}
602+
603+
StreamStateHolder stateHolder(csb, depStreams);
604+
stateHolder.deactivate();
605+
582606
rsb = optimizer->generateRetrieval(stream.number, sortPtr, false, false);
607+
}
583608

584609
rsbs.add(rsb);
585610
streams.add(stream.number);

0 commit comments

Comments
 (0)