Skip to content

Commit e67f85e

Browse files
committed
Sync with v5
1 parent c888168 commit e67f85e

File tree

2 files changed

+174
-59
lines changed

2 files changed

+174
-59
lines changed

src/jrd/RecordSourceNodes.cpp

Lines changed: 112 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const
5555
namespace
5656
{
5757
// Search through the list of ANDed booleans to find comparisons
58-
// referring streams of other select expressions.
58+
// referring streams of parent select expressions.
5959
// Extract those booleans and return them to the caller.
6060

6161
bool findDependentBooleans(CompilerScratch* csb,
@@ -85,16 +85,19 @@ namespace
8585

8686
if (const auto cmpNode = nodeAs<ComparativeBoolNode>(boolean))
8787
{
88-
SortedStreamList streams;
89-
cmpNode->collectStreams(streams);
90-
91-
for (const auto stream : streams)
88+
if (cmpNode->blrOp == blr_eql || cmpNode->blrOp == blr_equiv)
9289
{
93-
if (!rseStreams.exist(stream))
90+
SortedStreamList streams;
91+
cmpNode->collectStreams(streams);
92+
93+
for (const auto stream : streams)
9494
{
95-
booleanStack.push(boolean);
96-
*parentBoolean = nullptr;
97-
return true;
95+
if (rseStreams.exist(stream))
96+
{
97+
booleanStack.push(boolean);
98+
*parentBoolean = nullptr;
99+
return true;
100+
}
98101
}
99102
}
100103
}
@@ -106,6 +109,7 @@ namespace
106109
// They are candidates to be converted into semi- or anti-joins.
107110

108111
bool findPossibleJoins(CompilerScratch* csb,
112+
const StreamList& rseStreams,
109113
BoolExprNode** parentBoolean,
110114
RecordSourceNodeStack& rseStack,
111115
BoolExprNodeStack& booleanStack)
@@ -115,10 +119,10 @@ namespace
115119
const auto binaryNode = nodeAs<BinaryBoolNode>(boolNode);
116120
if (binaryNode && binaryNode->blrOp == blr_and)
117121
{
118-
const bool found1 = findPossibleJoins(csb, binaryNode->arg1.getAddress(),
119-
rseStack, booleanStack);
120-
const bool found2 = findPossibleJoins(csb, binaryNode->arg2.getAddress(),
121-
rseStack, booleanStack);
122+
const bool found1 = findPossibleJoins(csb, rseStreams,
123+
binaryNode->arg1.getAddress(), rseStack, booleanStack);
124+
const bool found2 = findPossibleJoins(csb, rseStreams,
125+
binaryNode->arg2.getAddress(), rseStack, booleanStack);
122126

123127
if (!binaryNode->arg1 && !binaryNode->arg2)
124128
*parentBoolean = nullptr;
@@ -135,18 +139,23 @@ namespace
135139
if (rseNode && (rseNode->blrOp == blr_any || rseNode->blrOp == blr_ansi_any))
136140
{
137141
auto rse = rseNode->rse;
138-
fb_assert(rse);
142+
fb_assert(rse && (rse->flags & RseNode::FLAG_SUB_QUERY));
139143

140-
if (rse->rse_boolean)
144+
if (rse->rse_boolean && rse->rse_jointype == blr_inner &&
145+
!rse->rse_first && !rse->rse_skip && !rse->rse_plan)
141146
{
147+
// Find booleans convertable into semi-joins
148+
142149
StreamList streams;
143150
rse->computeRseStreams(streams);
144151

145152
BoolExprNodeStack booleans;
146-
if (findDependentBooleans(csb, streams,
153+
if (findDependentBooleans(csb, rseStreams,
147154
rse->rse_boolean.getAddress(),
148155
booleans))
149156
{
157+
// Compose the conjunct boolean
158+
150159
fb_assert(booleans.hasData());
151160
auto boolean = booleans.pop();
152161
while (booleans.hasData())
@@ -158,11 +167,45 @@ namespace
158167
boolean = andNode;
159168
}
160169

161-
rse->flags |= RseNode::FLAG_SEMI_JOINED;
162-
rseStack.push(rse);
163-
booleanStack.push(boolean);
164-
*parentBoolean = nullptr;
165-
return true;
170+
// Ensure that no external references are left inside the subquery.
171+
// If so, mark the RSE as joined and add it to the stack.
172+
173+
SortedStreamList streams;
174+
rse->collectStreams(streams);
175+
176+
bool dependent = false;
177+
for (const auto stream : streams)
178+
{
179+
if (rseStreams.exist(stream))
180+
{
181+
dependent = true;
182+
break;
183+
}
184+
}
185+
186+
if (!dependent)
187+
{
188+
rse->flags &= ~RseNode::FLAG_SUB_QUERY;
189+
rse->flags |= RseNode::FLAG_SEMI_JOINED;
190+
rseStack.push(rse);
191+
booleanStack.push(boolean);
192+
*parentBoolean = nullptr;
193+
return true;
194+
}
195+
196+
// Otherwise, restore the original sub-query by adding
197+
// the collected booleans back to the RSE.
198+
199+
if (rse->rse_boolean)
200+
{
201+
const auto andNode = FB_NEW_POOL(csb->csb_pool)
202+
BinaryBoolNode(csb->csb_pool, blr_and);
203+
andNode->arg1 = boolean;
204+
andNode->arg2 = rse->rse_boolean;
205+
boolean = andNode;
206+
}
207+
208+
rse->rse_boolean = boolean;
166209
}
167210
}
168211
}
@@ -3419,21 +3462,51 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr
34193462

34203463
RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
34213464
{
3422-
if (rse_jointype != blr_inner || !rse_boolean)
3465+
if (rse_jointype != blr_inner || !rse_boolean || rse_plan)
34233466
return nullptr;
34243467

3468+
// If the sub-query is nested inside the other sub-query which wasn't converted into semi-join,
3469+
// it makes no sense to apply a semi-join at the deeper levels, as a sub-query is expected
3470+
// to be executed repeatedly.
3471+
// This is a temporary fix until nested loop semi-joins are allowed by the optimizer.
3472+
3473+
if (flags & FLAG_SUB_QUERY)
3474+
return nullptr;
3475+
3476+
for (const auto node : csb->csb_current_nodes)
3477+
{
3478+
if (const auto rse = nodeAs<RseNode>(node))
3479+
{
3480+
if (rse->flags & FLAG_SUB_QUERY)
3481+
return nullptr;
3482+
}
3483+
}
3484+
34253485
RecordSourceNodeStack rseStack;
34263486
BoolExprNodeStack booleanStack;
34273487

34283488
// Find possibly joinable sub-queries
34293489

3430-
if (!findPossibleJoins(csb, rse_boolean.getAddress(), rseStack, booleanStack))
3490+
StreamList rseStreams;
3491+
computeRseStreams(rseStreams);
3492+
3493+
if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), rseStack, booleanStack))
34313494
return nullptr;
34323495

34333496
fb_assert(rseStack.hasData() && booleanStack.hasData());
34343497
fb_assert(rseStack.getCount() == booleanStack.getCount());
34353498

3436-
// Create joins between the original node and detected joinable nodes
3499+
// Create joins between the original node and detected joinable nodes.
3500+
// Preserve FIRST/SKIP nodes at their original position, i.e. outside semi-joins.
3501+
3502+
const auto first = rse_first;
3503+
rse_first = nullptr;
3504+
3505+
const auto skip = rse_skip;
3506+
rse_skip = nullptr;
3507+
3508+
const auto orgFlags = flags;
3509+
flags = 0;
34373510

34383511
auto rse = this;
34393512
while (rseStack.hasData())
@@ -3450,6 +3523,21 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb)
34503523
rse = newRse;
34513524
}
34523525

3526+
if (first || skip)
3527+
{
3528+
const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool())
3529+
RseNode(*tdbb->getDefaultPool());
3530+
3531+
newRse->rse_relations.add(rse);
3532+
newRse->rse_jointype = blr_inner;
3533+
newRse->rse_first = first;
3534+
newRse->rse_skip = skip;
3535+
3536+
rse = newRse;
3537+
}
3538+
3539+
rse->flags = orgFlags;
3540+
34533541
return rse;
34543542
}
34553543

src/jrd/optimizer/Optimizer.cpp

Lines changed: 62 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,15 @@ namespace
248248
for (const auto subRiver : rivers)
249249
{
250250
auto subRsb = subRiver->getRecordSource();
251+
251252
subRiver->activate(csb);
252253
if (subRiver != rivers.front())
253254
subRsb = opt->applyBoolean(subRsb, iter);
255+
254256
rsbs.add(subRsb);
255257
}
258+
259+
rivers.clear();
256260
}
257261

258262
m_rsb = FB_NEW_POOL(csb->csb_pool)
@@ -695,7 +699,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
695699
// AB: If we have limit our retrieval with FIRST / SKIP syntax then
696700
// we may not deliver above conditions (from higher rse's) to this
697701
// rse, because the results should be consistent.
698-
if (rse->rse_skip || rse->rse_first)
702+
if (rse->rse_skip || rse->rse_first || isSemiJoined())
699703
parentStack = nullptr;
700704

701705
// Set base-point before the parent/distributed nodes begin.
@@ -811,14 +815,15 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
811815
for (const auto rseStream : rseStreams)
812816
csb->csb_rpt[rseStream].deactivate();
813817

814-
// Find and collect booleans that are invariant in this context
815-
// (i.e. independent from streams in the RseNode). We can do that
816-
// easily because these streams are inactive at this point and
817-
// any node that references them will be not computable.
818+
// Find and collect booleans that are both deterministic and invariant
819+
// in this context (i.e. independent from streams in the RseNode).
820+
// We can check that easily because these streams are inactive at this point
821+
// and any node that references them will be not computable.
818822
// Note that we cannot do that for outer joins, as in this case boolean
819823
// represents a join condition which does not filter out the rows.
820824

821825
BoolExprNode* invariantBoolean = nullptr;
826+
822827
if (isInnerJoin())
823828
{
824829
for (auto iter = getBaseConjuncts(); iter.hasData(); ++iter)
@@ -835,8 +840,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
835840
// Go through the record selection expression generating
836841
// record source blocks for all streams
837842

838-
bool semiJoin = false;
839-
RiverList rivers, dependentRivers;
843+
RiverList rivers, dependentRivers, specialRivers;
840844

841845
bool innerSubStream = false;
842846
for (auto node : rse->rse_relations)
@@ -845,11 +849,9 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
845849
fb_assert(aggregate == rse->rse_aggregate);
846850

847851
const auto subRse = nodeAs<RseNode>(node);
848-
if (subRse && subRse->isSemiJoined())
849-
{
850-
fb_assert(rse->rse_jointype == blr_inner);
851-
semiJoin = true;
852-
}
852+
853+
const bool semiJoin = (subRse && subRse->isSemiJoined());
854+
fb_assert(!semiJoin || rse->rse_jointype == blr_inner);
853855

854856
// Find the stream number and place it at the end of the bedStreams array
855857
// (if this is really a stream and not another RseNode)
@@ -874,7 +876,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
874876
// AB: Save all outer-part streams
875877
if (isInnerJoin() || (isLeftJoin() && !innerSubStream))
876878
{
877-
if (!semiJoin && node->computable(csb, INVALID_STREAM, false))
879+
if (node->computable(csb, INVALID_STREAM, false))
878880
computable = true;
879881

880882
// Apply local booleans, if any. Note that it's done
@@ -889,7 +891,11 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
889891
if (computable)
890892
{
891893
outerStreams.join(localStreams);
892-
rivers.add(river);
894+
895+
if (semiJoin)
896+
specialRivers.add(river);
897+
else
898+
rivers.add(river);
893899
}
894900
else
895901
{
@@ -946,18 +952,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
946952
}
947953
else
948954
{
949-
// Compile the main streams before processing the semi-join itself
950-
if (semiJoin && compileStreams.hasData())
951-
{
952-
generateInnerJoin(compileStreams, rivers, &sort, rse->rse_plan);
953-
fb_assert(compileStreams.isEmpty());
954-
955-
// Ensure the main query river is stored before the semi-joined ones
956-
const auto river = rivers.pop();
957-
rivers.insert(0, river);
958-
}
959-
960-
const JoinType joinType = semiJoin ? SEMI_JOIN : INNER_JOIN;
955+
JoinType joinType = INNER_JOIN;
961956

962957
// AB: If previous rsb's are already on the stack we can't use
963958
// a navigational-retrieval for an ORDER BY because the next
@@ -1026,16 +1021,48 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack)
10261021
// Attempt to form joins in decreasing order of desirability
10271022
generateInnerJoin(joinStreams, rivers, &sort, rse->rse_plan);
10281023

1029-
// Re-activate remaining rivers to be hashable/mergeable
1030-
for (const auto river : rivers)
1031-
river->activate(csb);
1024+
if (rivers.isEmpty() && dependentRivers.isEmpty())
1025+
{
1026+
// This case may look weird, but it's possible for recursive unions
1027+
rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, 0, nullptr, joinType);
1028+
}
1029+
else
1030+
{
1031+
while (rivers.hasData() || dependentRivers.hasData())
1032+
{
1033+
// Re-activate remaining rivers to be hashable/mergeable
1034+
for (const auto river : rivers)
1035+
river->activate(csb);
1036+
1037+
// If there are multiple rivers, try some hashing or sort/merging
1038+
while (generateEquiJoin(rivers, joinType))
1039+
;
10321040

1033-
// If there are multiple rivers, try some hashing or sort/merging
1034-
while (generateEquiJoin(rivers, joinType))
1035-
;
1041+
if (dependentRivers.hasData())
1042+
{
1043+
fb_assert(joinType == INNER_JOIN);
10361044

1037-
rivers.join(dependentRivers);
1038-
rsb = CrossJoin(this, rivers, joinType).getRecordSource();
1045+
rivers.join(dependentRivers);
1046+
dependentRivers.clear();
1047+
}
1048+
1049+
const auto finalRiver = FB_NEW_POOL(getPool()) CrossJoin(this, rivers, joinType);
1050+
fb_assert(rivers.isEmpty());
1051+
rsb = finalRiver->getRecordSource();
1052+
1053+
if (specialRivers.hasData())
1054+
{
1055+
fb_assert(joinType == INNER_JOIN);
1056+
joinType = SEMI_JOIN;
1057+
1058+
rivers.add(finalRiver);
1059+
rivers.join(specialRivers);
1060+
specialRivers.clear();
1061+
}
1062+
}
1063+
}
1064+
1065+
fb_assert(rsb);
10391066

10401067
// Pick up any residual boolean that may have fallen thru the cracks
10411068
rsb = applyResidualBoolean(rsb);
@@ -2485,7 +2512,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType)
24852512
{
24862513
maxCardinality2 = maxCardinality1;
24872514
maxCardinality1 = cardinality;
2488-
maxCardinalityPosition = rivers.getCount();
2515+
maxCardinalityPosition = joinedRivers.getCount();
24892516
}
24902517
else if (cardinality > maxCardinality2)
24912518
maxCardinality2 = cardinality;

0 commit comments

Comments
 (0)