diff --git a/src/jrd/RecordSourceNodes.cpp b/src/jrd/RecordSourceNodes.cpp index 235727b0b4c..b508333bcb0 100644 --- a/src/jrd/RecordSourceNodes.cpp +++ b/src/jrd/RecordSourceNodes.cpp @@ -55,7 +55,16 @@ static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const namespace { - void appendContextAlias(DsqlCompilerScratch* dsqlScratch, const string& alias) + struct SpecialJoinItem + { + RseNode* rse; + bool semiJoin; + BoolExprNode* boolean; + }; + + typedef HalfStaticArray SpecialJoinList; + + void appendContextAlias(DsqlCompilerScratch* dsqlScratch, const string& alias) { const auto len = alias.length(); if (len <= MAX_UCHAR) @@ -126,8 +135,7 @@ namespace bool findPossibleJoins(CompilerScratch* csb, const StreamList& rseStreams, BoolExprNode** parentBoolean, - RecordSourceNodeStack& rseStack, - BoolExprNodeStack& booleanStack) + SpecialJoinList& result) { auto boolNode = *parentBoolean; @@ -135,9 +143,9 @@ namespace if (binaryNode && binaryNode->blrOp == blr_and) { const bool found1 = findPossibleJoins(csb, rseStreams, - binaryNode->arg1.getAddress(), rseStack, booleanStack); + binaryNode->arg1.getAddress(), result); const bool found2 = findPossibleJoins(csb, rseStreams, - binaryNode->arg2.getAddress(), rseStack, booleanStack); + binaryNode->arg2.getAddress(), result); if (!binaryNode->arg1 && !binaryNode->arg2) *parentBoolean = nullptr; @@ -156,7 +164,7 @@ namespace auto rse = rseNode->rse; fb_assert(rse && (rse->flags & RseNode::FLAG_SUB_QUERY)); - if (rse->rse_boolean && rse->rse_jointype == blr_inner && + if (rse->rse_boolean && rse->isInnerJoin() && !rse->rse_first && !rse->rse_skip && !rse->rse_plan) { // Find booleans convertable into semi-joins @@ -201,9 +209,7 @@ namespace if (!dependent) { rse->flags &= ~RseNode::FLAG_SUB_QUERY; - rse->flags |= RseNode::FLAG_SEMI_JOINED; - rseStack.push(rse); - booleanStack.push(boolean); + result.push({rse, true, boolean}); *parentBoolean = nullptr; return true; } @@ -1019,7 +1025,7 @@ void RelationSourceNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseN // 1) If the view has a projection, sort, first/skip or explicit plan. // 2) If it's part of an outer join. - if (rse->rse_jointype != blr_inner || // viewRse->rse_jointype != blr_inner || ??? + if (!rse->isInnerJoin() || // !viewRse->isInnerJoin() || ??? viewRse->rse_sorted || viewRse->rse_projection || viewRse->rse_first || viewRse->rse_skip || viewRse->rse_plan) { @@ -2970,19 +2976,19 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) switch (rse_jointype) { - case blr_inner: + case INNER_JOIN: streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]); break; - case blr_left: + case LEFT_JOIN: streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); ++dsqlScratch->inOuterJoin; streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]); --dsqlScratch->inOuterJoin; break; - case blr_right: + case RIGHT_JOIN: ++dsqlScratch->inOuterJoin; streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); --dsqlScratch->inOuterJoin; @@ -2993,7 +2999,7 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]); break; - case blr_full: + case FULL_JOIN: ++dsqlScratch->inOuterJoin; streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); // Temporarily remove just created context(s) from the stack, @@ -3065,7 +3071,7 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) if (matched->items.isEmpty()) { // There is no match. Transform to CROSS JOIN. - node->rse_jointype = blr_inner; + node->rse_jointype = INNER_JOIN; usingList = NULL; delete matched; @@ -3280,14 +3286,14 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb) ValueExprNode* skip = rse_skip; PlanNode* plan = rse_plan; - if (rse_jointype == blr_inner) + if (isInnerJoin()) csb->csb_inner_booleans.push(rse_boolean); // zip thru RseNode expanding views and inner joins for (auto sub : rse_relations) processSource(tdbb, csb, this, sub, &boolean, stack); - if (rse_jointype == blr_inner) + if (isInnerJoin()) csb->csb_inner_booleans.pop(); // Now, rebuild the RseNode block. @@ -3362,7 +3368,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, return; } - if (rse_jointype != blr_inner) + if (isOuterJoin()) { // Check whether any of the upper level booleans (those belonging to the WHERE clause) // is able to filter out rows from the "inner" streams. If this is the case, @@ -3377,7 +3383,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, StreamList streams; // First check the left stream of the full outer join - if (rse_jointype == blr_full) + if (isFullJoin()) { rse1->computeRseStreams(streams); @@ -3385,7 +3391,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, { if (boolean && boolean->ignoreNulls(streams)) { - rse_jointype = blr_left; + rse_jointype = LEFT_JOIN; break; } } @@ -3399,16 +3405,16 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, { if (boolean && boolean->ignoreNulls(streams)) { - if (rse_jointype == blr_full) + if (isFullJoin()) { // We should transform FULL join to RIGHT join, // but as we don't allow them inside the engine // just swap the sides and insist it's LEFT join std::swap(rse_relations[0], rse_relations[1]); - rse_jointype = blr_left; + rse_jointype = LEFT_JOIN; } else - rse_jointype = blr_inner; + rse_jointype = INNER_JOIN; break; } @@ -3423,11 +3429,9 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, // where we are just trying to inner join more than 2 streams. If possible, // try to flatten the tree out before we go any further. - if (!isLateral() && !isSemiJoined() && - rse->rse_jointype == blr_inner && - rse_jointype == blr_inner && - !rse_sorted && !rse_projection && - !rse_first && !rse_skip && !rse_plan) + if (!isLateral() && + rse->isInnerJoin() && isInnerJoin() && + !rse_sorted && !rse_projection && !rse_first && !rse_skip && !rse_plan) { for (auto sub : rse_relations) processSource(tdbb, csb, rse, sub, boolean, stack); @@ -3518,10 +3522,11 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr computeRseStreams(rseStreams); BoolExprNodeStack conjunctStack; + StreamStateHolder stateHolder(csb, opt->getOuterStreams()); - // pass RseNode boolean only to inner substreams because join condition + // Pass RseNode boolean only to inner substreams because join condition // should never exclude records from outer substreams - if (opt->isInnerJoin() || (opt->isLeftJoin() && innerSubStream)) + if (opt->isInnerJoin() || ((opt->isLeftJoin() || opt->isSpecialJoin()) && innerSubStream)) { // AB: For an (X LEFT JOIN Y) mark the outer-streams (X) as // active because the inner-streams (Y) are always "dependent" @@ -3529,39 +3534,27 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr // // dimitr: the same for lateral derived tables in inner joins - StreamStateHolder stateHolder(csb, opt->getOuterStreams()); - - if (opt->isLeftJoin() || isLateral() || isSemiJoined()) - { + if (!opt->isInnerJoin() || isLateral()) stateHolder.activate(); - if (opt->isLeftJoin() || isSemiJoined()) - { - // Push all conjuncts except "missing" ones (e.g. IS NULL) - for (auto iter = opt->getConjuncts(false, true); iter.hasData(); ++iter) - { - if (iter->containsAnyStream(rseStreams)) - conjunctStack.push(iter); - } - } - } - else + // For the LEFT JOIN, push all conjuncts except "missing" ones (e.g. IS NULL) + for (auto iter = opt->getConjuncts(false, opt->isLeftJoin()); iter.hasData(); ++iter) { - for (auto iter = opt->getConjuncts(); iter.hasData(); ++iter) - { - if (iter->containsAnyStream(rseStreams)) - conjunctStack.push(iter); - } + if (iter->containsAnyStream(rseStreams)) + conjunctStack.push(iter); } - return opt->compile(this, &conjunctStack); + if (opt->isSpecialJoin() && !opt->deliverJoinConjuncts(conjunctStack)) + conjunctStack.clear(); } - - // Push only parent conjuncts to the outer stream - for (auto iter = opt->getConjuncts(true, false); iter.hasData(); ++iter) + else { - if (iter->containsAnyStream(rseStreams)) - conjunctStack.push(iter); + // Push only parent conjuncts to the outer stream + for (auto iter = opt->getConjuncts(true, false); iter.hasData(); ++iter) + { + if (iter->containsAnyStream(rseStreams)) + conjunctStack.push(iter); + } } return opt->compile(this, &conjunctStack); @@ -3569,7 +3562,7 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) { - if (rse_jointype != blr_inner || !rse_boolean || rse_plan) + if (!isInnerJoin() || !rse_boolean || rse_plan) return nullptr; // If the sub-query is nested inside the other sub-query which wasn't converted into semi-join, @@ -3589,19 +3582,16 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) } } - RecordSourceNodeStack rseStack; - BoolExprNodeStack booleanStack; - // Find possibly joinable sub-queries StreamList rseStreams; computeRseStreams(rseStreams); + SpecialJoinList specialJoins; - if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), rseStack, booleanStack)) + if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), specialJoins)) return nullptr; - fb_assert(rseStack.hasData() && booleanStack.hasData()); - fb_assert(rseStack.getCount() == booleanStack.getCount()); + fb_assert(specialJoins.hasData()); // Create joins between the original node and detected joinable nodes. // Preserve FIRST/SKIP nodes at their original position, i.e. outside semi-joins. @@ -3616,16 +3606,18 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) flags = 0; auto rse = this; - while (rseStack.hasData()) + while (specialJoins.hasData()) { const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool()) RseNode(*tdbb->getDefaultPool()); + const auto item = specialJoins.pop(); + newRse->rse_relations.add(rse); - newRse->rse_relations.add(rseStack.pop()); + newRse->rse_relations.add(item.rse); - newRse->rse_jointype = blr_inner; - newRse->rse_boolean = booleanStack.pop(); + newRse->rse_jointype = item.semiJoin ? SEMI_JOIN : ANTI_JOIN; + newRse->rse_boolean = item.boolean; rse = newRse; } @@ -3636,7 +3628,7 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) RseNode(*tdbb->getDefaultPool()); newRse->rse_relations.add(rse); - newRse->rse_jointype = blr_inner; + newRse->rse_jointype = INNER_JOIN; newRse->rse_first = first; newRse->rse_skip = skip; diff --git a/src/jrd/RecordSourceNodes.h b/src/jrd/RecordSourceNodes.h index 29e0c3bc698..e26105be6a5 100644 --- a/src/jrd/RecordSourceNodes.h +++ b/src/jrd/RecordSourceNodes.h @@ -715,6 +715,16 @@ class WindowSourceNode final : public TypedNode { + enum : UCHAR // storage is BLR-compatible + { + INNER_JOIN = blr_inner, + LEFT_JOIN = blr_left, + RIGHT_JOIN = blr_right, + FULL_JOIN = blr_full, + SEMI_JOIN, + ANTI_JOIN + }; + public: enum : USHORT { @@ -725,10 +735,40 @@ class RseNode final : public TypedNode rse_invariants; // Invariant nodes bound to top-level RSE Firebird::Array > rse_relations; USHORT flags = 0; - USHORT rse_jointype = blr_inner; // inner, left, full - Firebird::TriState firstRows; // optimize for first rows + UCHAR rse_jointype = INNER_JOIN; + Firebird::TriState firstRows; // optimize for first rows }; class SelectExprNode final : public TypedNode diff --git a/src/jrd/optimizer/InnerJoin.cpp b/src/jrd/optimizer/InnerJoin.cpp index a6b1a364f2e..c356fa5e898 100644 --- a/src/jrd/optimizer/InnerJoin.cpp +++ b/src/jrd/optimizer/InnerJoin.cpp @@ -535,7 +535,7 @@ River* InnerJoin::formRiver() // Create a nested loop join from the priorly processed streams const auto priorRsb = (rsbs.getCount() == 1) ? rsbs[0] : - FB_NEW_POOL(getPool()) NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin()); + FB_NEW_POOL(getPool()) NestedLoopJoin(csb, JoinType::INNER, rsbs.getCount(), rsbs.begin()); // Prepare record sources and corresponding equivalence keys for hash-joining RecordSource* hashJoinRsbs[] = {priorRsb, rsb}; @@ -580,7 +580,7 @@ River* InnerJoin::formRiver() // Create a hash join rsb = FB_NEW_POOL(getPool()) - HashJoin(tdbb, csb, INNER_JOIN, 2, hashJoinRsbs, keys.begin(), stream.selectivity); + HashJoin(tdbb, csb, JoinType::INNER, 2, hashJoinRsbs, keys.begin(), stream.selectivity); // Clear priorly processed rsb's, as they're already incorporated into a hash join rsbs.clear(); @@ -597,7 +597,7 @@ River* InnerJoin::formRiver() // Create a nested loop join from the processed streams rsb = (rsbs.getCount() == 1) ? rsbs[0] : - FB_NEW_POOL(getPool()) NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin()); + FB_NEW_POOL(getPool()) NestedLoopJoin(csb, JoinType::INNER, rsbs.getCount(), rsbs.begin()); // Ensure matching booleans are rechecked early if (equiMatches.hasData()) diff --git a/src/jrd/optimizer/Optimizer.cpp b/src/jrd/optimizer/Optimizer.cpp index 6eeb614c4f0..8ab77f2373f 100644 --- a/src/jrd/optimizer/Optimizer.cpp +++ b/src/jrd/optimizer/Optimizer.cpp @@ -171,7 +171,7 @@ namespace CrossJoin(Optimizer* opt, RiverList& rivers, JoinType joinType) : River(opt->getCompilerScratch(), nullptr, rivers) { - fb_assert(joinType != OUTER_JOIN); + fb_assert(joinType != JoinType::OUTER); const auto csb = opt->getCompilerScratch(); auto iter = opt->getBaseConjuncts(); @@ -196,7 +196,7 @@ namespace { HalfStaticArray rsbs(riverCount); - if (joinType == INNER_JOIN) + if (joinType == JoinType::INNER) { // Reorder input rivers according to their possible inter-dependencies @@ -260,7 +260,7 @@ namespace } m_rsb = FB_NEW_POOL(csb->csb_pool) - NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin(), joinType); + NestedLoopJoin(csb, joinType, rsbs.getCount(), rsbs.begin()); } } }; @@ -591,11 +591,10 @@ namespace // Optimizer::Optimizer(thread_db* aTdbb, CompilerScratch* aCsb, RseNode* aRse, - bool parentFirstRows, double parentCardinality) + bool parentFirstRows) : PermanentStorage(*aTdbb->getDefaultPool()), tdbb(aTdbb), csb(aCsb), rse(aRse), firstRows(rse->firstRows.valueOr(parentFirstRows)), - cardinality(parentCardinality), compileStreams(getPool()), bedStreams(getPool()), keyStreams(getPool()), @@ -651,7 +650,7 @@ RecordSource* Optimizer::compile(RseNode* subRse, BoolExprNodeStack* parentStack // if we're going to sort/aggregate the resultset afterwards const bool subFirstRows = firstRows && !rse->rse_sorted && !rse->rse_aggregate; - Optimizer subOpt(tdbb, csb, subRse, subFirstRows, cardinality); + Optimizer subOpt(tdbb, csb, subRse, subFirstRows); const auto rsb = subOpt.compile(parentStack); if (parentStack && subOpt.isInnerJoin()) @@ -702,33 +701,11 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) conjunctCount += distributeEqualities(conjunctStack, conjunctCount); - if (parentStack) - { - // AB: If we have limit our retrieval with FIRST / SKIP syntax then - // we may not deliver above conditions (from higher rse's) to this - // rse, because the results should be consistent. - if (rse->rse_skip || rse->rse_first) - parentStack = nullptr; - - if (isSemiJoined()) - { - fb_assert(parentStack->hasData()); - - // We have a semi-join, look at the parent (priorly joined streams) cardinality. - // If it's known to be not very small, nullify the parent conjuncts - // to give up a possible nested loop join in favor of a hash join. - // Here we assume every equi-join condition having a default selectivity (0.1). - // TODO: replace with a proper cost-based decision in the future. - - double subSelectivity = MAXIMUM_SELECTIVITY; - for (auto count = parentStack->getCount(); count; count--) - subSelectivity *= DEFAULT_SELECTIVITY; - const auto thresholdCardinality = MINIMUM_CARDINALITY / subSelectivity; - - if (!cardinality || cardinality > thresholdCardinality) - parentStack = nullptr; - } - } + // AB: If we have limit our retrieval with FIRST / SKIP syntax then + // we may not deliver above conditions (from higher rse's) to this + // rse, because the results should be consistent. + if (rse->rse_skip || rse->rse_first) + parentStack = nullptr; // Set base-point before the parent/distributed nodes begin. const unsigned baseCount = conjunctCount; @@ -758,7 +735,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) { const auto node = iter.object(); - if (!isInnerJoin() && node->possiblyUnknown()) + if (isOuterJoin() && node->possiblyUnknown()) { // parent missing conjunctions shouldn't be // distributed to FULL OUTER JOIN streams at all @@ -870,20 +847,19 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // record source blocks for all streams RiverList rivers, dependentRivers; - HalfStaticArray specialSubQueries; + RseNode* specialRse = nullptr; bool innerSubStream = false; for (auto node : rse->rse_relations) { fb_assert(sort == rse->rse_sorted); fb_assert(aggregate == rse->rse_aggregate); + fb_assert(!specialRse); - const auto subRse = nodeAs(node); - - if (subRse && subRse->isSemiJoined()) + if (isSpecialJoin() && innerSubStream) { - fb_assert(rse->rse_jointype == blr_inner); - specialSubQueries.add(subRse); + specialRse = nodeAs(node); + fb_assert(specialRse); continue; } @@ -908,14 +884,14 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) bool computable = false; // AB: Save all outer-part streams - if (isInnerJoin() || (isLeftJoin() && !innerSubStream)) + if (isInnerJoin() || ((isLeftJoin() || isSpecialJoin()) && !innerSubStream)) { if (node->computable(csb, INVALID_STREAM, false)) computable = true; // Apply local booleans, if any. Note that it's done // only for inner joins and outer streams of left joins. - auto iter = getConjuncts(!isInnerJoin(), false); + auto iter = getConjuncts(isLeftJoin(), false); rsb = applyLocalBoolean(rsb, localStreams, iter); } @@ -974,15 +950,14 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) } // Outer joins are processed their own way - if (!isInnerJoin()) + if (rse->isOuterJoin()) { rivers.join(dependentRivers); + dependentRivers.clear(); rsb = OuterJoin(tdbb, this, rse, rivers, &sort).generate(); } else { - JoinType joinType = INNER_JOIN; - // AB: If previous rsb's are already on the stack we can't use // a navigational-retrieval for an ORDER BY because the next // streams are JOINed to the previous ones @@ -993,57 +968,60 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // AB: We could already have multiple rivers at this // point so try to do some hashing or sort/merging now. - while (generateEquiJoin(rivers, joinType)) + while (generateEquiJoin(rivers, JoinType::INNER)) ; } StreamList joinStreams(compileStreams); - fb_assert(joinStreams.getCount() != 1 || csb->csb_rpt[joinStreams[0]].csb_relation); - - while (true) + if (isInnerJoin()) { - // AB: Determine which streams have an index relationship - // with the currently active rivers. This is needed so that - // no merge is made between a new cross river and the - // currently active rivers. Where in the new cross river - // a stream depends (index) on the active rivers. - StreamList dependentStreams, freeStreams; - findDependentStreams(joinStreams, dependentStreams, freeStreams); + fb_assert(joinStreams.getCount() != 1 || csb->csb_rpt[joinStreams[0]].csb_relation); - // If we have dependent and free streams then we can't rely on - // the sort node to be used for index navigation - if (dependentStreams.hasData() && freeStreams.hasData()) + while (true) { - sort = nullptr; - sortCanBeUsed = false; - } + // AB: Determine which streams have an index relationship + // with the currently active rivers. This is needed so that + // no merge is made between a new cross river and the + // currently active rivers. Where in the new cross river + // a stream depends (index) on the active rivers. + StreamList dependentStreams, freeStreams; + findDependentStreams(joinStreams, dependentStreams, freeStreams); + + // If we have dependent and free streams then we can't rely on + // the sort node to be used for index navigation + if (dependentStreams.hasData() && freeStreams.hasData()) + { + sort = nullptr; + sortCanBeUsed = false; + } - if (dependentStreams.hasData()) - { - // Copy free streams - joinStreams.assign(freeStreams); + if (dependentStreams.hasData()) + { + // Copy free streams + joinStreams.assign(freeStreams); - // Make rivers from the dependent streams - generateInnerJoin(dependentStreams, rivers, &sort, rse->rse_plan); + // Make rivers from the dependent streams + generateInnerJoin(dependentStreams, rivers, &sort, rse->rse_plan); - // Generate one river which holds a cross join rsb between - // all currently available rivers + // Generate one river which holds a cross join rsb between + // all currently available rivers - rivers.add(FB_NEW_POOL(getPool()) CrossJoin(this, rivers, joinType)); - rivers.back()->activate(csb); - } - else - { - if (freeStreams.hasData()) - { - // Deactivate streams from rivers on stack, because - // the remaining streams don't have any indexed relationship with them - for (const auto river : rivers) - river->deactivate(csb); + rivers.add(FB_NEW_POOL(getPool()) CrossJoin(this, rivers, JoinType::INNER)); + rivers.back()->activate(csb); } + else + { + if (freeStreams.hasData()) + { + // Deactivate streams from rivers on stack, because + // the remaining streams don't have any indexed relationship with them + for (const auto river : rivers) + river->deactivate(csb); + } - break; + break; + } } } @@ -1053,10 +1031,12 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) if (rivers.isEmpty() && dependentRivers.isEmpty()) { // This case may look weird, but it's possible for recursive unions - rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, 0, nullptr, joinType); + rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, JoinType::INNER, 0, nullptr); } else { + auto joinType = JoinType::INNER; + while (rivers.hasData() || dependentRivers.hasData()) { // Re-activate remaining rivers to be hashable/mergeable @@ -1078,27 +1058,24 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) rsb = finalRiver->getRecordSource(); cardinality = rsb->getCardinality(); - if (specialSubQueries.hasData()) + if (specialRse) { - fb_assert(joinType == INNER_JOIN); - joinType = SEMI_JOIN; + fb_assert(joinType == JoinType::INNER); + joinType = rse->isSemiJoin() ? JoinType::SEMI : JoinType::ANTI; rivers.add(finalRiver); - for (const auto rse : specialSubQueries) - { - const auto sub = rse->compile(tdbb, this, true); - fb_assert(sub); + const auto sub = specialRse->compile(tdbb, this, true); + fb_assert(sub); - StreamList localStreams; - sub->findUsedStreams(localStreams); + StreamList localStreams; + sub->findUsedStreams(localStreams); - const auto subRiver = FB_NEW_POOL(getPool()) River(csb, sub, rse, localStreams); - auto& list = subRiver->isDependent(*finalRiver) ? dependentRivers : rivers; - list.add(subRiver); - } + const auto subRiver = FB_NEW_POOL(getPool()) River(csb, sub, specialRse, localStreams); + auto& list = subRiver->isDependent(*finalRiver) ? dependentRivers : rivers; + list.add(subRiver); - specialSubQueries.clear(); + specialRse = nullptr; } } } @@ -2020,7 +1997,7 @@ void Optimizer::checkSorts() // Walk trough the relations of the RSE and see if a // matching stream can be found. - if (newRse->rse_jointype == blr_inner) + if (newRse->isInnerJoin()) { if (newRse->rse_relations.getCount() == 1) node = newRse->rse_relations[0]; @@ -2393,7 +2370,7 @@ void Optimizer::formRivers(const StreamList& streams, bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) { - fb_assert(joinType != OUTER_JOIN); + fb_assert(joinType != JoinType::OUTER); ULONG selected_rivers[OPT_STREAM_BITS], selected_rivers2[OPT_STREAM_BITS]; ValueExprNode** eq_class; @@ -2580,7 +2557,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) // If any of to-be-hashed rivers is too large to be hashed efficiently, // then prefer a merge join instead of a hash join. - const bool useMergeJoin = hashOverflow; + const bool useMergeJoin = hashOverflow && (joinType == JoinType::INNER); // Build a join stream @@ -2588,7 +2565,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) RecordSource* finalRsb = nullptr; // MERGE JOIN does not support other join types yet - if (useMergeJoin && joinType == INNER_JOIN) + if (useMergeJoin && joinType == JoinType::INNER) { position = 0; for (const auto river : joinedRivers) @@ -2615,7 +2592,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) } else { - if (joinType == INNER_JOIN) + if (joinType == JoinType::INNER) { // Ensure that the largest river is placed at the first position. // It's important for a hash join to be efficient. diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index 9566b4b3882..a4d415ae4f9 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -316,7 +316,8 @@ class Optimizer : public Firebird::PermanentStorage iter->flags = 0; } - // Assignment is not currently used in the code and I doubt it should be + ConjunctIterator() = delete; + ConjunctIterator(const ConjunctIterator& other) = delete; ConjunctIterator& operator=(const ConjunctIterator& other) = delete; private: @@ -329,10 +330,6 @@ class Optimizer : public Firebird::PermanentStorage { rewind(); } - - ConjunctIterator(const ConjunctIterator& other) - : begin(other.begin), end(other.end), iter(other.iter) - {} }; ConjunctIterator getBaseConjuncts() @@ -441,6 +438,23 @@ class Optimizer : public Firebird::PermanentStorage selectivity = minSelectivity + diffSelectivity * factor; } + bool deliverJoinConjuncts(const BoolExprNodeStack& conjuncts) + { + fb_assert(conjuncts.hasData()); + + // Look at cardinality of the priorly joined streams. If it's known to be + // not very small, give up a possible nested loop join in favor of a hash join. + // Here we assume every equi-join condition having a default selectivity (0.1). + // TODO: replace with a proper cost-based decision in the future. + + double subSelectivity = MAXIMUM_SELECTIVITY; + for (auto count = conjuncts.getCount(); count; count--) + subSelectivity *= DEFAULT_SELECTIVITY; + const auto thresholdCardinality = MINIMUM_CARDINALITY / subSelectivity; + + return (cardinality && cardinality <= thresholdCardinality); + } + static RecordSource* compile(thread_db* tdbb, CompilerScratch* csb, RseNode* rse) { bool firstRows = false; @@ -455,7 +469,7 @@ class Optimizer : public Firebird::PermanentStorage firstRows = attachment->att_opt_first_rows.valueOr(defaultFirstRows); } - return Optimizer(tdbb, csb, rse, firstRows, 0).compile(nullptr); + return Optimizer(tdbb, csb, rse, firstRows).compile(nullptr); } ~Optimizer(); @@ -481,17 +495,27 @@ class Optimizer : public Firebird::PermanentStorage bool isInnerJoin() const { - return (rse->rse_jointype == blr_inner); + return rse->isInnerJoin(); + } + + bool isOuterJoin() const + { + return rse->isOuterJoin(); } bool isLeftJoin() const { - return (rse->rse_jointype == blr_left); + return rse->isLeftJoin(); } bool isFullJoin() const { - return (rse->rse_jointype == blr_full); + return rse->isFullJoin(); + } + + bool isSpecialJoin() const + { + return rse->isSpecialJoin(); } const StreamList& getOuterStreams() const @@ -519,11 +543,6 @@ class Optimizer : public Firebird::PermanentStorage return composeBoolean(iter, selectivity); } - bool isSemiJoined() const - { - return (rse->flags & RseNode::FLAG_SEMI_JOINED) != 0; - } - bool checkEquiJoin(BoolExprNode* boolean); bool getEquiJoinKeys(BoolExprNode* boolean, NestConst* node1, @@ -535,7 +554,7 @@ class Optimizer : public Firebird::PermanentStorage private: Optimizer(thread_db* aTdbb, CompilerScratch* aCsb, RseNode* aRse, - bool parentFirstRows, double parentCardinality); + bool parentFirstRows); RecordSource* compile(BoolExprNodeStack* parentStack); @@ -549,7 +568,7 @@ class Optimizer : public Firebird::PermanentStorage RiverList& rivers, SortNode** sortClause, const PlanNode* planClause); - bool generateEquiJoin(RiverList& rivers, JoinType joinType = INNER_JOIN); + bool generateEquiJoin(RiverList& rivers, JoinType joinType); void generateInnerJoin(StreamList& streams, RiverList& rivers, SortNode** sortClause, diff --git a/src/jrd/par.cpp b/src/jrd/par.cpp index a95a4202c3c..fe86a105507 100644 --- a/src/jrd/par.cpp +++ b/src/jrd/par.cpp @@ -1360,8 +1360,7 @@ RseNode* PAR_rse(thread_db* tdbb, CompilerScratch* csb, SSHORT rse_op) case blr_join_type: { - const USHORT jointype = (USHORT) csb->csb_blr_reader.getByte(); - rse->rse_jointype = jointype; + const auto jointype = csb->csb_blr_reader.getByte(); if (jointype != blr_inner && jointype != blr_left && jointype != blr_right && @@ -1369,6 +1368,7 @@ RseNode* PAR_rse(thread_db* tdbb, CompilerScratch* csb, SSHORT rse_op) { PAR_syntax_error(csb, "join type clause"); } + rse->rse_jointype = jointype; break; } @@ -1410,7 +1410,7 @@ RseNode* PAR_rse(thread_db* tdbb, CompilerScratch* csb, SSHORT rse_op) // An outer join is only allowed when the stream count is 2 // and a boolean expression has been supplied - if (rse->rse_jointype == blr_inner || + if (rse->isInnerJoin() || (rse->rse_relations.getCount() == 2 && rse->rse_boolean)) { // Convert right outer joins to left joins to avoid diff --git a/src/jrd/recsrc/FullOuterJoin.cpp b/src/jrd/recsrc/FullOuterJoin.cpp index ca62d6a5a52..8a828a97629 100644 --- a/src/jrd/recsrc/FullOuterJoin.cpp +++ b/src/jrd/recsrc/FullOuterJoin.cpp @@ -40,15 +40,16 @@ using namespace Jrd; FullOuterJoin::FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, const StreamList& checkStreams) - : RecordSource(csb), - m_arg1(arg1), - m_arg2(arg2), + : Join(csb, 2, JoinType::OUTER), m_checkStreams(csb->csb_pool, checkStreams) { - fb_assert(m_arg1 && m_arg2); + fb_assert(arg1 && arg2); m_impure = csb->allocImpure(); m_cardinality = arg1->getCardinality() + arg2->getCardinality(); + + m_args.add(arg1); + m_args.add(arg2); } void FullOuterJoin::internalOpen(thread_db* tdbb) const @@ -58,25 +59,22 @@ void FullOuterJoin::internalOpen(thread_db* tdbb) const impure->irsb_flags = irsb_open | irsb_first; - m_arg1->open(tdbb); + m_args[0]->open(tdbb); } void FullOuterJoin::close(thread_db* tdbb) const { - Request* const request = tdbb->getRequest(); + const auto request = tdbb->getRequest(); invalidateRecords(request); - Impure* const impure = request->getImpure(m_impure); + const auto impure = request->getImpure(m_impure); if (impure->irsb_flags & irsb_open) { impure->irsb_flags &= ~irsb_open; - if (impure->irsb_flags & irsb_first) - m_arg1->close(tdbb); - else - m_arg2->close(tdbb); + Join::close(tdbb); } } @@ -84,26 +82,29 @@ bool FullOuterJoin::internalGetRecord(thread_db* tdbb) const { JRD_reschedule(tdbb); - Request* const request = tdbb->getRequest(); - Impure* const impure = request->getImpure(m_impure); + const auto request = tdbb->getRequest(); + const auto impure = request->getImpure(m_impure); if (!(impure->irsb_flags & irsb_open)) return false; + const auto arg1 = m_args[0]; + const auto arg2 = m_args[1]; + if (impure->irsb_flags & irsb_first) { - if (m_arg1->getRecord(tdbb)) + if (arg1->getRecord(tdbb)) return true; impure->irsb_flags &= ~irsb_first; - m_arg1->close(tdbb); - m_arg2->open(tdbb); + arg1->close(tdbb); + arg2->open(tdbb); } // We should exclude matching records from the right-joined (second) record source, // as they're already returned from the left-joined (first) record source - while (m_arg2->getRecord(tdbb)) + while (arg2->getRecord(tdbb)) { bool matched = false; @@ -123,25 +124,11 @@ bool FullOuterJoin::internalGetRecord(thread_db* tdbb) const return false; } -bool FullOuterJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult FullOuterJoin::lockRecord(thread_db* tdbb) const -{ - SET_TDBB(tdbb); - - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void FullOuterJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { level++; plan += "JOIN ("; - m_arg1->getLegacyPlan(tdbb, plan, level); - plan += ", "; - m_arg2->getLegacyPlan(tdbb, plan, level); + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } @@ -152,39 +139,5 @@ void FullOuterJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsig planEntry.lines.add().text = "Full Outer Join"; printOptInfo(planEntry.lines); - if (recurse) - { - ++level; - m_arg1->getPlan(tdbb, planEntry.children.add(), level, recurse); - m_arg2->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void FullOuterJoin::markRecursive() -{ - m_arg1->markRecursive(); - m_arg2->markRecursive(); -} - -void FullOuterJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - m_arg1->findUsedStreams(streams, expandAll); - m_arg2->findUsedStreams(streams, expandAll); -} - -bool FullOuterJoin::isDependent(const StreamList& streams) const -{ - return m_arg1->isDependent(streams) || m_arg2->isDependent(streams); -} - -void FullOuterJoin::invalidateRecords(Request* request) const -{ - m_arg1->invalidateRecords(request); - m_arg2->invalidateRecords(request); -} - -void FullOuterJoin::nullRecords(thread_db* tdbb) const -{ - m_arg1->nullRecords(tdbb); - m_arg2->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index 3ba9941c8ca..a00a239ed5f 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -252,10 +252,8 @@ class HashJoin::HashTable : public PermanentStorage HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity) - : RecordSource(csb), - m_joinType(joinType), - m_boolean(nullptr), - m_args(csb->csb_pool, count - 1) + : Join(csb, count, joinType), + m_subs(csb->csb_pool, count - 1) { fb_assert(count >= 2); @@ -266,10 +264,8 @@ HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, BoolExprNode* boolean, RecordSource* const* args, NestValueArray* const* keys, double selectivity) - : RecordSource(csb), - m_joinType(OUTER_JOIN), - m_boolean(boolean), - m_args(csb->csb_pool, 1) + : Join(csb, 2, JoinType::OUTER, boolean), + m_subs(csb->csb_pool, 1) { init(tdbb, csb, 2, args, keys, selectivity); } @@ -287,6 +283,7 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, m_leader.totalKeyLength = 0; m_cardinality = m_leader.source->getCardinality(); + m_args.add(m_leader.source); for (FB_SIZE_T j = 0; j < leaderKeyCount; j++) { @@ -314,13 +311,13 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, for (FB_SIZE_T i = 1; i < count; i++) { - RecordSource* const sub_rsb = args[i]; - fb_assert(sub_rsb); + const auto subRsb = args[i]; + fb_assert(subRsb); - m_cardinality *= sub_rsb->getCardinality(); + m_cardinality *= subRsb->getCardinality(); SubStream sub; - sub.buffer = FB_NEW_POOL(csb->csb_pool) BufferedStream(csb, sub_rsb); + sub.buffer = FB_NEW_POOL(csb->csb_pool) BufferedStream(csb, subRsb); sub.keys = keys[i]; const FB_SIZE_T subKeyCount = sub.keys->getCount(); sub.keyLengths = FB_NEW_POOL(csb->csb_pool) ULONG[subKeyCount]; @@ -350,15 +347,12 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, sub.totalKeyLength += keyLength; } - m_args.add(sub); + m_subs.add(sub); + m_args.add(sub.buffer); } if (!selectivity) - { - selectivity = MAXIMUM_SELECTIVITY; - while (keyCount--) - selectivity *= REDUCE_SELECTIVITY_FACTOR_EQUALITY; - } + selectivity = pow(REDUCE_SELECTIVITY_FACTOR_EQUALITY, keyCount); m_cardinality *= selectivity; } @@ -390,16 +384,13 @@ void HashJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; + Join::close(tdbb); + delete impure->irsb_hash_table; impure->irsb_hash_table = nullptr; delete[] impure->irsb_leader_buffer; impure->irsb_leader_buffer = nullptr; - - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i].buffer->close(tdbb); - - m_leader.source->close(tdbb); } } @@ -413,7 +404,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!(impure->irsb_flags & irsb_open)) return false; - const auto inner = m_args.front().source; + const auto inner = m_subs.front().source; while (true) { @@ -437,7 +428,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!impure->irsb_hash_table && !impure->irsb_leader_buffer) { auto& pool = *tdbb->getDefaultPool(); - const auto argCount = m_args.getCount(); + const auto argCount = m_subs.getCount(); impure->irsb_hash_table = FB_NEW_POOL(pool) HashTable(pool, argCount); impure->irsb_leader_buffer = FB_NEW_POOL(pool) UCHAR[m_leader.totalKeyLength]; @@ -449,14 +440,14 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const // Read and cache the inner streams. While doing that, // hash the join condition values and populate hash tables. - m_args[i].buffer->open(tdbb); + m_subs[i].buffer->open(tdbb); ULONG counter = 0; - const auto keyBuffer = buffer.getBuffer(m_args[i].totalKeyLength, false); + const auto keyBuffer = buffer.getBuffer(m_subs[i].totalKeyLength, false); - while (m_args[i].buffer->getRecord(tdbb)) + while (m_subs[i].buffer->getRecord(tdbb)) { - const auto hash = computeHash(tdbb, request, m_args[i], keyBuffer); + const auto hash = computeHash(tdbb, request, m_subs[i], keyBuffer); impure->irsb_hash_table->put(i, hash, counter++); } } @@ -474,10 +465,10 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!impure->irsb_hash_table->setup(impure->irsb_leader_hash)) { - if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + if (m_joinType == JoinType::INNER || m_joinType == JoinType::SEMI) continue; - if (m_joinType == OUTER_JOIN) + if (m_joinType == JoinType::OUTER) inner->nullRecords(tdbb); return true; @@ -493,7 +484,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const { bool found = true; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) + for (FB_SIZE_T i = 0; i < m_subs.getCount(); i++) { if (!fetchRecord(tdbb, impure, i)) { @@ -506,28 +497,28 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const { impure->irsb_flags |= irsb_mustread; - if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + if (m_joinType == JoinType::INNER || m_joinType == JoinType::SEMI) continue; - if (m_joinType == OUTER_JOIN) + if (m_joinType == JoinType::OUTER) inner->nullRecords(tdbb); break; } - if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::SEMI || m_joinType == JoinType::ANTI) { impure->irsb_flags |= irsb_mustread; - if (m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::ANTI) continue; } impure->irsb_flags &= ~irsb_first; } - else if (!fetchRecord(tdbb, impure, m_args.getCount() - 1)) + else if (!fetchRecord(tdbb, impure, m_subs.getCount() - 1)) { - fb_assert(m_joinType == INNER_JOIN); + fb_assert(m_joinType == JoinType::INNER); impure->irsb_flags |= irsb_mustread; continue; } @@ -538,29 +529,11 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const return true; } -bool HashJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult HashJoin::lockRecord(thread_db* /*tdbb*/) const -{ - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void HashJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { level++; plan += "HASH ("; - m_leader.source->getLegacyPlan(tdbb, plan, level); - plan += ", "; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - { - if (i) - plan += ", "; - - m_args[i].source->getLegacyPlan(tdbb, plan, level); - } + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } @@ -568,29 +541,7 @@ void HashJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned l { planEntry.className = "HashJoin"; - planEntry.lines.add().text = "Hash Join "; - - switch (m_joinType) - { - case INNER_JOIN: - planEntry.lines.back().text += "(inner)"; - break; - - case OUTER_JOIN: - planEntry.lines.back().text += "(outer)"; - break; - - case SEMI_JOIN: - planEntry.lines.back().text += "(semi)"; - break; - - case ANTI_JOIN: - planEntry.lines.back().text += "(anti)"; - break; - - default: - fb_assert(false); - } + planEntry.lines.add().text = "Hash Join " + printType(); string extras; extras.printf(" (keys: %" ULONGFORMAT", total key length: %" ULONGFORMAT")", @@ -600,61 +551,7 @@ void HashJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned l printOptInfo(planEntry.lines); - if (recurse) - { - ++level; - - m_leader.source->getPlan(tdbb, planEntry.children.add(), level, recurse); - - for (const auto& arg : m_args) - arg.source->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void HashJoin::markRecursive() -{ - m_leader.source->markRecursive(); - - for (const auto& arg : m_args) - arg.source->markRecursive(); -} - -void HashJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - m_leader.source->findUsedStreams(streams, expandAll); - - for (const auto& arg : m_args) - arg.source->findUsedStreams(streams, expandAll); -} - -bool HashJoin::isDependent(const StreamList& streams) const -{ - if (m_leader.source->isDependent(streams)) - return true; - - for (const auto& arg : m_args) - { - if (arg.source->isDependent(streams)) - return true; - } - - return (m_boolean && m_boolean->containsAnyStream(streams)); -} - -void HashJoin::invalidateRecords(Request* request) const -{ - m_leader.source->invalidateRecords(request); - - for (const auto& arg : m_args) - arg.source->invalidateRecords(request); -} - -void HashJoin::nullRecords(thread_db* tdbb) const -{ - m_leader.source->nullRecords(tdbb); - - for (const auto& arg : m_args) - arg.source->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } ULONG HashJoin::computeHash(thread_db* tdbb, @@ -740,7 +637,7 @@ bool HashJoin::fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) co { HashTable* const hashTable = impure->irsb_hash_table; - const BufferedStream* const arg = m_args[stream].buffer; + const BufferedStream* const arg = m_subs[stream].buffer; ULONG position; if (hashTable->iterate(stream, impure->irsb_leader_hash, position)) @@ -751,7 +648,7 @@ bool HashJoin::fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) co return true; } - if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::SEMI || m_joinType == JoinType::ANTI) return false; while (true) diff --git a/src/jrd/recsrc/MergeJoin.cpp b/src/jrd/recsrc/MergeJoin.cpp index de3bf491a6d..bc65abf0c60 100644 --- a/src/jrd/recsrc/MergeJoin.cpp +++ b/src/jrd/recsrc/MergeJoin.cpp @@ -38,32 +38,21 @@ static const char* const SCRATCH = "fb_merge_"; MergeJoin::MergeJoin(CompilerScratch* csb, FB_SIZE_T count, SortedStream* const* args, const NestValueArray* const* keys) - : RecordSource(csb), - m_args(csb->csb_pool), - m_keys(csb->csb_pool) + : Join(csb, count, JoinType::INNER), + m_keys(csb->csb_pool, count) { const size_t size = sizeof(struct Impure) + count * sizeof(Impure::irsb_mrg_repeat); m_impure = csb->allocImpure(FB_ALIGNMENT, static_cast(size)); m_cardinality = MINIMUM_CARDINALITY; - m_args.resize(count); - m_keys.resize(count); - for (FB_SIZE_T i = 0; i < count; i++) { - fb_assert(args[i]); - m_args[i] = args[i]; - - m_cardinality *= args[i]->getCardinality(); - if (i) - { - for (auto keyCount = keys[i]->getCount(); keyCount; keyCount--) - m_cardinality *= REDUCE_SELECTIVITY_FACTOR_EQUALITY; - } - - fb_assert(keys[i]); - m_keys[i] = keys[i]; + m_args.add(args[i]); + m_cardinality *= args[i]->getCardinality() * + pow(REDUCE_SELECTIVITY_FACTOR_EQUALITY, keys[i]->getCount()); } + + m_keys.add(keys, count); } void MergeJoin::internalOpen(thread_db* tdbb) const @@ -114,14 +103,12 @@ void MergeJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; + Join::close(tdbb); + for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) { Impure::irsb_mrg_repeat* const tail = &impure->irsb_mrg_rpt[i]; - // close all the substreams for the sort-merge - - m_args[i]->close(tdbb); - // Release memory associated with the merge file block and the sort file block. // Also delete the merge file if one exists. @@ -335,27 +322,11 @@ bool MergeJoin::internalGetRecord(thread_db* tdbb) const return true; } -bool MergeJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult MergeJoin::lockRecord(thread_db* /*tdbb*/) const -{ - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void MergeJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { level++; plan += "MERGE ("; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - { - if (i) - plan += ", "; - - m_args[i]->getLegacyPlan(tdbb, plan, level); - } + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } @@ -363,55 +334,17 @@ void MergeJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned { planEntry.className = "MergeJoin"; + planEntry.lines.add().text = "Merge Join " + printType(); + string extras; extras.printf(" (keys: %" ULONGFORMAT", total key length: %" ULONGFORMAT")", m_keys[0]->getCount(), m_args[0]->getKeyLength()); - planEntry.lines.add().text = "Merge Join (inner)" + extras; - printOptInfo(planEntry.lines); + planEntry.lines.back().text += extras; - if (recurse) - { - ++level; - - for (const auto arg : m_args) - arg->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void MergeJoin::markRecursive() -{ - for (auto arg : m_args) - arg->markRecursive(); -} - -void MergeJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - for (const auto arg : m_args) - arg->findUsedStreams(streams, expandAll); -} - -bool MergeJoin::isDependent(const StreamList& streams) const -{ - for (const auto arg : m_args) - { - if (arg->isDependent(streams)) - return true; - } - - return false; -} - -void MergeJoin::invalidateRecords(Request* request) const -{ - for (const auto arg : m_args) - arg->invalidateRecords(request); -} + printOptInfo(planEntry.lines); -void MergeJoin::nullRecords(thread_db* tdbb) const -{ - for (const auto arg : m_args) - arg->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } int MergeJoin::compare(thread_db* tdbb, const NestValueArray* node1, diff --git a/src/jrd/recsrc/NestedLoopJoin.cpp b/src/jrd/recsrc/NestedLoopJoin.cpp index 484cf54d0eb..cd6567fab92 100644 --- a/src/jrd/recsrc/NestedLoopJoin.cpp +++ b/src/jrd/recsrc/NestedLoopJoin.cpp @@ -35,14 +35,9 @@ using namespace Jrd; // Data access: nested loops join // ------------------------------ -NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, - FB_SIZE_T count, - RecordSource* const* args, - JoinType joinType) - : RecordSource(csb), - m_joinType(joinType), - m_boolean(nullptr), - m_args(csb->csb_pool, count) +NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, JoinType joinType, + FB_SIZE_T count, RecordSource* const* args) + : Join(csb, count, joinType) { m_impure = csb->allocImpure(); m_cardinality = MINIMUM_CARDINALITY; @@ -57,19 +52,15 @@ NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, BoolExprNode* boolean) - : RecordSource(csb), - m_joinType(OUTER_JOIN), - m_boolean(boolean), - m_args(csb->csb_pool, 2) + : Join(csb, 2, JoinType::OUTER, boolean) { fb_assert(outer && inner); m_impure = csb->allocImpure(); + m_cardinality = outer->getCardinality() * inner->getCardinality(); m_args.add(outer); m_args.add(inner); - - m_cardinality = outer->getCardinality() * inner->getCardinality(); } void NestedLoopJoin::internalOpen(thread_db* tdbb) const @@ -92,8 +83,7 @@ void NestedLoopJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; - for (const auto arg : m_args) - arg->close(tdbb); + Join::close(tdbb); } } @@ -107,7 +97,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const if (!(impure->irsb_flags & irsb_open)) return false; - if (m_joinType == INNER_JOIN) + if (m_joinType == JoinType::INNER) { if (impure->irsb_flags & irsb_first) { @@ -129,7 +119,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const else if (!fetchRecord(tdbb, m_args.getCount() - 1)) return false; } - else if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + else if (m_joinType == JoinType::SEMI || m_joinType == JoinType::ANTI) { const auto outer = m_args[0]; @@ -161,7 +151,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const if (m_args[i]->getRecord(tdbb)) { - if (m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::ANTI) { stopArg = i; break; @@ -169,7 +159,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const } else { - if (m_joinType == SEMI_JOIN) + if (m_joinType == JoinType::SEMI) { stopArg = i; break; @@ -188,7 +178,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const } else { - fb_assert(m_joinType == OUTER_JOIN); + fb_assert(m_joinType == JoinType::OUTER); fb_assert(m_args.getCount() == 2); const auto outer = m_args[0]; @@ -241,29 +231,13 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const return true; } -bool NestedLoopJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult NestedLoopJoin::lockRecord(thread_db* /*tdbb*/) const -{ - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void NestedLoopJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { if (m_args.hasData()) { level++; plan += "JOIN ("; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - { - if (i) - plan += ", "; - - m_args[i]->getLegacyPlan(tdbb, plan, level); - } + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } } @@ -272,79 +246,15 @@ void NestedLoopJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsi { planEntry.className = "NestedLoopJoin"; - planEntry.lines.add().text = "Nested Loop Join "; - - switch (m_joinType) - { - case INNER_JOIN: - planEntry.lines.back().text += "(inner)"; - break; - - case OUTER_JOIN: - planEntry.lines.back().text += "(outer)"; - break; - - case SEMI_JOIN: - planEntry.lines.back().text += "(semi)"; - break; - - case ANTI_JOIN: - planEntry.lines.back().text += "(anti)"; - break; - - default: - fb_assert(false); - } - + planEntry.lines.add().text = "Nested Loop Join " + printType(); printOptInfo(planEntry.lines); - if (recurse) - { - ++level; - - for (const auto arg : m_args) - arg->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void NestedLoopJoin::markRecursive() -{ - for (auto arg : m_args) - arg->markRecursive(); -} - -void NestedLoopJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - for (const auto arg : m_args) - arg->findUsedStreams(streams, expandAll); -} - -bool NestedLoopJoin::isDependent(const StreamList& streams) const -{ - for (const auto arg : m_args) - { - if (arg->isDependent(streams)) - return true; - } - - return (m_boolean && m_boolean->containsAnyStream(streams)); -} - -void NestedLoopJoin::invalidateRecords(Request* request) const -{ - for (const auto arg : m_args) - arg->invalidateRecords(request); -} - -void NestedLoopJoin::nullRecords(thread_db* tdbb) const -{ - for (const auto arg : m_args) - arg->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } bool NestedLoopJoin::fetchRecord(thread_db* tdbb, FB_SIZE_T n) const { - fb_assert(m_joinType == INNER_JOIN); + fb_assert(m_joinType == JoinType::INNER); const RecordSource* const arg = m_args[n]; diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index b5414a8b00c..e1b8994f8a4 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -53,7 +53,7 @@ namespace Jrd class BufferedStream; class PlanEntry; - enum JoinType { INNER_JOIN, OUTER_JOIN, SEMI_JOIN, ANTI_JOIN }; + enum class JoinType { INNER, OUTER, SEMI, ANTI }; // Common base for record sources, sub-queries and cursors. class AccessPath @@ -1153,27 +1153,129 @@ namespace Jrd // Multiplexing (many -> one) access methods - class NestedLoopJoin : public RecordSource + template + class Join : public RecordSource { public: - NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args, - JoinType joinType = INNER_JOIN); - NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, - BoolExprNode* boolean); + Join(CompilerScratch* csb, FB_SIZE_T count, JoinType joinType, BoolExprNode* boolean = nullptr) + : RecordSource(csb), m_joinType(joinType), m_boolean(boolean), + m_args(csb->csb_pool, count) + { + fb_assert(!m_boolean || m_joinType == JoinType::OUTER); + } - void close(thread_db* tdbb) const override; + virtual void close(thread_db* tdbb) const + { + for (const auto& arg : m_args) + arg->close(tdbb); + } - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; + bool refetchRecord(thread_db* /*tdbb*/) const override + { + return true; + } - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; + WriteLockResult lockRecord(thread_db* /*tdbb*/) const override + { + Firebird::status_exception::raise(Firebird::Arg::Gds(isc_record_lock_not_supp)); + } - void markRecursive() override; - void invalidateRecords(Request* request) const override; + void markRecursive() override + { + for (auto& arg : m_args) + arg->markRecursive(); + } - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; + void findUsedStreams(StreamList& streams, bool expandAll) const override + { + for (const auto& arg : m_args) + arg->findUsedStreams(streams, expandAll); + } + + bool isDependent(const StreamList& streams) const override + { + for (const auto& arg : m_args) + { + if (arg->isDependent(streams)) + return true; + } + + return (m_boolean && m_boolean->containsAnyStream(streams)); + } + + void invalidateRecords(Request* request) const override + { + for (const auto& arg : m_args) + arg->invalidateRecords(request); + } + + void nullRecords(thread_db* tdbb) const override + { + for (const auto& arg : m_args) + arg->nullRecords(tdbb); + } + + protected: + const JoinType m_joinType; + const NestConst m_boolean; + Firebird::Array > m_args; + + void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const + { + for (const auto& arg : m_args) + { + if (arg != m_args.front()) + plan += ", "; + + arg->getLegacyPlan(tdbb, plan, level); + } + } + + void getPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const + { + if (recurse) + { + ++level; + + for (const auto& arg : m_args) + arg->getPlan(tdbb, planEntry.children.add(), level, recurse); + } + } + + const Firebird::string printType() const + { + switch (m_joinType) + { + case JoinType::INNER: + return "(inner)"; + + case JoinType::OUTER: + return "(outer)"; + + case JoinType::SEMI: + return "(semi)"; + + case JoinType::ANTI: + return "(anti)"; + + default: + fb_assert(false); + } + + return ""; + } + }; + + class NestedLoopJoin : public Join + { + public: + NestedLoopJoin(CompilerScratch* csb, JoinType joinType, + FB_SIZE_T count, RecordSource* const* args); + NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, + BoolExprNode* boolean); + + void close(thread_db* tdbb) const override; + void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; protected: void internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const override; @@ -1182,45 +1284,27 @@ namespace Jrd private: bool fetchRecord(thread_db*, FB_SIZE_T) const; - - const JoinType m_joinType; - const NestConst m_boolean; - - Firebird::Array > m_args; }; - class FullOuterJoin : public RecordSource + class FullOuterJoin : public Join { public: FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, const StreamList& checkStreams); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - protected: void internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const override; void internalOpen(thread_db* tdbb) const override; bool internalGetRecord(thread_db* tdbb) const override; private: - NestConst m_arg1; - NestConst m_arg2; const StreamList m_checkStreams; }; - class HashJoin : public RecordSource + class HashJoin : public Join { class HashTable; @@ -1254,19 +1338,8 @@ namespace Jrd double selectivity = 0); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - static unsigned maxCapacity(); protected: @@ -1282,14 +1355,11 @@ namespace Jrd const SubStream& sub, UCHAR* buffer) const; bool fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) const; - const JoinType m_joinType; - const NestConst m_boolean; - SubStream m_leader; - Firebird::Array m_args; + Firebird::Array m_subs; }; - class MergeJoin : public RecordSource + class MergeJoin : public Join { struct MergeFile { @@ -1325,19 +1395,8 @@ namespace Jrd const NestValueArray* const* keys); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - protected: void internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const override; void internalOpen(thread_db* tdbb) const override; @@ -1350,7 +1409,6 @@ namespace Jrd SLONG getRecordByIndex(thread_db* tdbb, FB_SIZE_T index) const; bool fetchRecord(thread_db* tdbb, FB_SIZE_T index) const; - Firebird::Array > m_args; Firebird::Array m_keys; };