From d28200417c09fccf106d41d2efccf8a23f24a276 Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Mon, 7 Apr 2025 07:57:12 +0300 Subject: [PATCH 1/5] Refactor join types in RSE and RecordSource. Stricter processing of special joins. The optimizer is mostly ready for anti-joins. --- src/jrd/RecordSourceNodes.cpp | 123 ++++++++++++------------- src/jrd/RecordSourceNodes.h | 58 ++++++++++-- src/jrd/optimizer/InnerJoin.cpp | 6 +- src/jrd/optimizer/Optimizer.cpp | 146 +++++++++++++++--------------- src/jrd/optimizer/Optimizer.h | 30 +++--- src/jrd/par.cpp | 6 +- src/jrd/recsrc/FullOuterJoin.cpp | 2 +- src/jrd/recsrc/HashJoin.cpp | 46 +++------- src/jrd/recsrc/MergeJoin.cpp | 2 +- src/jrd/recsrc/NestedLoopJoin.cpp | 49 +++------- src/jrd/recsrc/RecordSource.h | 51 +++++++++-- 11 files changed, 264 insertions(+), 255 deletions(-) diff --git a/src/jrd/RecordSourceNodes.cpp b/src/jrd/RecordSourceNodes.cpp index a0950d2a3a2..fea341d9775 100644 --- a/src/jrd/RecordSourceNodes.cpp +++ b/src/jrd/RecordSourceNodes.cpp @@ -55,6 +55,15 @@ static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const namespace { + struct SpecialJoinItem + { + RseNode* rse; + bool semiJoin; + BoolExprNode* boolean; + }; + + typedef HalfStaticArray SpecialJoinList; + // Search through the list of ANDed booleans to find comparisons // referring streams of parent select expressions. // Extract those booleans and return them to the caller. @@ -112,8 +121,7 @@ namespace bool findPossibleJoins(CompilerScratch* csb, const StreamList& rseStreams, BoolExprNode** parentBoolean, - RecordSourceNodeStack& rseStack, - BoolExprNodeStack& booleanStack) + SpecialJoinList& result) { auto boolNode = *parentBoolean; @@ -121,9 +129,9 @@ namespace if (binaryNode && binaryNode->blrOp == blr_and) { const bool found1 = findPossibleJoins(csb, rseStreams, - binaryNode->arg1.getAddress(), rseStack, booleanStack); + binaryNode->arg1.getAddress(), result); const bool found2 = findPossibleJoins(csb, rseStreams, - binaryNode->arg2.getAddress(), rseStack, booleanStack); + binaryNode->arg2.getAddress(), result); if (!binaryNode->arg1 && !binaryNode->arg2) *parentBoolean = nullptr; @@ -142,7 +150,7 @@ namespace auto rse = rseNode->rse; fb_assert(rse && (rse->flags & RseNode::FLAG_SUB_QUERY)); - if (rse->rse_boolean && rse->rse_jointype == blr_inner && + if (rse->rse_boolean && rse->isInnerJoin() && !rse->rse_first && !rse->rse_skip && !rse->rse_plan) { // Find booleans convertable into semi-joins @@ -187,9 +195,7 @@ namespace if (!dependent) { rse->flags &= ~RseNode::FLAG_SUB_QUERY; - rse->flags |= RseNode::FLAG_SEMI_JOINED; - rseStack.push(rse); - booleanStack.push(boolean); + result.push({rse, true, boolean}); *parentBoolean = nullptr; return true; } @@ -993,7 +999,7 @@ void RelationSourceNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseN // 1) If the view has a projection, sort, first/skip or explicit plan. // 2) If it's part of an outer join. - if (rse->rse_jointype != blr_inner || // viewRse->rse_jointype != blr_inner || ??? + if (!rse->isInnerJoin() || // !viewRse->isInnerJoin() || ??? viewRse->rse_sorted || viewRse->rse_projection || viewRse->rse_first || viewRse->rse_skip || viewRse->rse_plan) { @@ -2913,19 +2919,19 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) switch (rse_jointype) { - case blr_inner: + case INNER_JOIN: streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]); break; - case blr_left: + case LEFT_JOIN: streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); ++dsqlScratch->inOuterJoin; streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]); --dsqlScratch->inOuterJoin; break; - case blr_right: + case RIGHT_JOIN: ++dsqlScratch->inOuterJoin; streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); --dsqlScratch->inOuterJoin; @@ -2936,7 +2942,7 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) streamList->items[1] = doDsqlPass(dsqlScratch, fromList->items[1]); break; - case blr_full: + case FULL_JOIN: ++dsqlScratch->inOuterJoin; streamList->items[0] = doDsqlPass(dsqlScratch, fromList->items[0]); // Temporarily remove just created context(s) from the stack, @@ -3008,7 +3014,7 @@ RseNode* RseNode::dsqlPass(DsqlCompilerScratch* dsqlScratch) if (matched->items.isEmpty()) { // There is no match. Transform to CROSS JOIN. - node->rse_jointype = blr_inner; + node->rse_jointype = INNER_JOIN; usingList = NULL; delete matched; @@ -3223,14 +3229,14 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb) ValueExprNode* skip = rse_skip; PlanNode* plan = rse_plan; - if (rse_jointype == blr_inner) + if (isInnerJoin()) csb->csb_inner_booleans.push(rse_boolean); // zip thru RseNode expanding views and inner joins for (auto sub : rse_relations) processSource(tdbb, csb, this, sub, &boolean, stack); - if (rse_jointype == blr_inner) + if (isInnerJoin()) csb->csb_inner_booleans.pop(); // Now, rebuild the RseNode block. @@ -3305,7 +3311,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, return; } - if (rse_jointype != blr_inner) + if (isOuterJoin()) { // Check whether any of the upper level booleans (those belonging to the WHERE clause) // is able to filter out rows from the "inner" streams. If this is the case, @@ -3320,7 +3326,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, StreamList streams; // First check the left stream of the full outer join - if (rse_jointype == blr_full) + if (isFullJoin()) { rse1->computeRseStreams(streams); @@ -3328,7 +3334,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, { if (boolean && boolean->ignoreNulls(streams)) { - rse_jointype = blr_left; + rse_jointype = LEFT_JOIN; break; } } @@ -3342,16 +3348,16 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, { if (boolean && boolean->ignoreNulls(streams)) { - if (rse_jointype == blr_full) + if (isFullJoin()) { // We should transform FULL join to RIGHT join, // but as we don't allow them inside the engine // just swap the sides and insist it's LEFT join std::swap(rse_relations[0], rse_relations[1]); - rse_jointype = blr_left; + rse_jointype = LEFT_JOIN; } else - rse_jointype = blr_inner; + rse_jointype = INNER_JOIN; break; } @@ -3366,11 +3372,9 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, // where we are just trying to inner join more than 2 streams. If possible, // try to flatten the tree out before we go any further. - if (!isLateral() && !isSemiJoined() && - rse->rse_jointype == blr_inner && - rse_jointype == blr_inner && - !rse_sorted && !rse_projection && - !rse_first && !rse_skip && !rse_plan) + if (!isLateral() && + rse->isInnerJoin() && isInnerJoin() && + !rse_sorted && !rse_projection && !rse_first && !rse_skip && !rse_plan) { for (auto sub : rse_relations) processSource(tdbb, csb, rse, sub, boolean, stack); @@ -3461,8 +3465,9 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr computeRseStreams(rseStreams); BoolExprNodeStack conjunctStack; + StreamStateHolder stateHolder(csb, opt->getOuterStreams()); - // pass RseNode boolean only to inner substreams because join condition + // Pass RseNode boolean only to inner substreams because join condition // should never exclude records from outer substreams if (opt->isInnerJoin() || (opt->isLeftJoin() && innerSubStream)) { @@ -3472,39 +3477,24 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr // // dimitr: the same for lateral derived tables in inner joins - StreamStateHolder stateHolder(csb, opt->getOuterStreams()); - - if (opt->isLeftJoin() || isLateral() || isSemiJoined()) - { + if (opt->isLeftJoin() || isLateral()) stateHolder.activate(); - if (opt->isLeftJoin() || isSemiJoined()) - { - // Push all conjuncts except "missing" ones (e.g. IS NULL) - for (auto iter = opt->getConjuncts(false, true); iter.hasData(); ++iter) - { - if (iter->containsAnyStream(rseStreams)) - conjunctStack.push(iter); - } - } - } - else + // For the LEFT JOIN, push all conjuncts except "missing" ones (e.g. IS NULL) + for (auto iter = opt->getConjuncts(false, opt->isLeftJoin()); iter.hasData(); ++iter) { - for (auto iter = opt->getConjuncts(); iter.hasData(); ++iter) - { - if (iter->containsAnyStream(rseStreams)) - conjunctStack.push(iter); - } + if (iter->containsAnyStream(rseStreams)) + conjunctStack.push(iter); } - - return opt->compile(this, &conjunctStack); } - - // Push only parent conjuncts to the outer stream - for (auto iter = opt->getConjuncts(true, false); iter.hasData(); ++iter) + else { - if (iter->containsAnyStream(rseStreams)) - conjunctStack.push(iter); + // Push only parent conjuncts to the outer stream + for (auto iter = opt->getConjuncts(true, false); iter.hasData(); ++iter) + { + if (iter->containsAnyStream(rseStreams)) + conjunctStack.push(iter); + } } return opt->compile(this, &conjunctStack); @@ -3512,7 +3502,7 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) { - if (rse_jointype != blr_inner || !rse_boolean || rse_plan) + if (!isInnerJoin() || !rse_boolean || rse_plan) return nullptr; // If the sub-query is nested inside the other sub-query which wasn't converted into semi-join, @@ -3532,19 +3522,16 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) } } - RecordSourceNodeStack rseStack; - BoolExprNodeStack booleanStack; - // Find possibly joinable sub-queries StreamList rseStreams; computeRseStreams(rseStreams); + SpecialJoinList specialJoins; - if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), rseStack, booleanStack)) + if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), specialJoins)) return nullptr; - fb_assert(rseStack.hasData() && booleanStack.hasData()); - fb_assert(rseStack.getCount() == booleanStack.getCount()); + fb_assert(specialJoins.hasData()); // Create joins between the original node and detected joinable nodes. // Preserve FIRST/SKIP nodes at their original position, i.e. outside semi-joins. @@ -3559,16 +3546,18 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) flags = 0; auto rse = this; - while (rseStack.hasData()) + while (specialJoins.hasData()) { const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool()) RseNode(*tdbb->getDefaultPool()); + const auto item = specialJoins.pop(); + newRse->rse_relations.add(rse); - newRse->rse_relations.add(rseStack.pop()); + newRse->rse_relations.add(item.rse); - newRse->rse_jointype = blr_inner; - newRse->rse_boolean = booleanStack.pop(); + newRse->rse_jointype = item.semiJoin ? SEMI_JOIN : ANTI_JOIN; + newRse->rse_boolean = item.boolean; rse = newRse; } @@ -3579,7 +3568,7 @@ RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) RseNode(*tdbb->getDefaultPool()); newRse->rse_relations.add(rse); - newRse->rse_jointype = blr_inner; + newRse->rse_jointype = INNER_JOIN; newRse->rse_first = first; newRse->rse_skip = skip; diff --git a/src/jrd/RecordSourceNodes.h b/src/jrd/RecordSourceNodes.h index cf3fad39fbf..25e69d4b9ab 100644 --- a/src/jrd/RecordSourceNodes.h +++ b/src/jrd/RecordSourceNodes.h @@ -715,6 +715,21 @@ class WindowSourceNode final : public TypedNode { + enum : UCHAR // storage is BLR-compatible + { + INNER_JOIN = 0, + LEFT_JOIN = 1, + RIGHT_JOIN = 2, + FULL_JOIN = 3, + SEMI_JOIN = 4, + ANTI_JOIN = 5 + }; + + static_assert(INNER_JOIN == blr_inner, "join type mismatch"); + static_assert(LEFT_JOIN == blr_left, "join type mismatch"); + static_assert(RIGHT_JOIN == blr_right, "join type mismatch"); + static_assert(FULL_JOIN == blr_full, "join type mismatch"); + public: enum : USHORT { @@ -725,10 +740,40 @@ class RseNode final : public TypedNode rse_invariants; // Invariant nodes bound to top-level RSE Firebird::Array > rse_relations; USHORT flags = 0; - USHORT rse_jointype = blr_inner; // inner, left, full - Firebird::TriState firstRows; // optimize for first rows + UCHAR rse_jointype = INNER_JOIN; + Firebird::TriState firstRows; // optimize for first rows }; class SelectExprNode final : public TypedNode diff --git a/src/jrd/optimizer/InnerJoin.cpp b/src/jrd/optimizer/InnerJoin.cpp index a6b1a364f2e..c356fa5e898 100644 --- a/src/jrd/optimizer/InnerJoin.cpp +++ b/src/jrd/optimizer/InnerJoin.cpp @@ -535,7 +535,7 @@ River* InnerJoin::formRiver() // Create a nested loop join from the priorly processed streams const auto priorRsb = (rsbs.getCount() == 1) ? rsbs[0] : - FB_NEW_POOL(getPool()) NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin()); + FB_NEW_POOL(getPool()) NestedLoopJoin(csb, JoinType::INNER, rsbs.getCount(), rsbs.begin()); // Prepare record sources and corresponding equivalence keys for hash-joining RecordSource* hashJoinRsbs[] = {priorRsb, rsb}; @@ -580,7 +580,7 @@ River* InnerJoin::formRiver() // Create a hash join rsb = FB_NEW_POOL(getPool()) - HashJoin(tdbb, csb, INNER_JOIN, 2, hashJoinRsbs, keys.begin(), stream.selectivity); + HashJoin(tdbb, csb, JoinType::INNER, 2, hashJoinRsbs, keys.begin(), stream.selectivity); // Clear priorly processed rsb's, as they're already incorporated into a hash join rsbs.clear(); @@ -597,7 +597,7 @@ River* InnerJoin::formRiver() // Create a nested loop join from the processed streams rsb = (rsbs.getCount() == 1) ? rsbs[0] : - FB_NEW_POOL(getPool()) NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin()); + FB_NEW_POOL(getPool()) NestedLoopJoin(csb, JoinType::INNER, rsbs.getCount(), rsbs.begin()); // Ensure matching booleans are rechecked early if (equiMatches.hasData()) diff --git a/src/jrd/optimizer/Optimizer.cpp b/src/jrd/optimizer/Optimizer.cpp index e31767d72d3..9957596e2b7 100644 --- a/src/jrd/optimizer/Optimizer.cpp +++ b/src/jrd/optimizer/Optimizer.cpp @@ -171,7 +171,7 @@ namespace CrossJoin(Optimizer* opt, RiverList& rivers, JoinType joinType) : River(opt->getCompilerScratch(), nullptr, rivers) { - fb_assert(joinType != OUTER_JOIN); + fb_assert(joinType != JoinType::OUTER); const auto csb = opt->getCompilerScratch(); Optimizer::ConjunctIterator iter(opt->getBaseConjuncts()); @@ -196,7 +196,7 @@ namespace { HalfStaticArray rsbs(riverCount); - if (joinType == INNER_JOIN) + if (joinType == JoinType::INNER) { // Reorder input rivers according to their possible inter-dependencies @@ -260,7 +260,7 @@ namespace } m_rsb = FB_NEW_POOL(csb->csb_pool) - NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin(), joinType); + NestedLoopJoin(csb, joinType, rsbs.getCount(), rsbs.begin()); } } }; @@ -710,10 +710,8 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) if (rse->rse_skip || rse->rse_first) parentStack = nullptr; - if (isSemiJoined()) + if (isSpecialJoin() && parentStack->hasData()) { - fb_assert(parentStack->hasData()); - // We have a semi-join, look at the parent (priorly joined streams) cardinality. // If it's known to be not very small, nullify the parent conjuncts // to give up a possible nested loop join in favor of a hash join. @@ -758,7 +756,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) { const auto node = iter.object(); - if (!isInnerJoin() && node->possiblyUnknown()) + if (isOuterJoin() && node->possiblyUnknown()) { // parent missing conjunctions shouldn't be // distributed to FULL OUTER JOIN streams at all @@ -870,20 +868,19 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // record source blocks for all streams RiverList rivers, dependentRivers; - HalfStaticArray specialSubQueries; + RseNode* specialRse = nullptr; bool innerSubStream = false; for (auto node : rse->rse_relations) { fb_assert(sort == rse->rse_sorted); fb_assert(aggregate == rse->rse_aggregate); + fb_assert(!specialRse); - const auto subRse = nodeAs(node); - - if (subRse && subRse->isSemiJoined()) + if (isSpecialJoin() && innerSubStream) { - fb_assert(rse->rse_jointype == blr_inner); - specialSubQueries.add(subRse); + specialRse = nodeAs(node); + fb_assert(specialRse); continue; } @@ -915,7 +912,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // Apply local booleans, if any. Note that it's done // only for inner joins and outer streams of left joins. - auto iter = getConjuncts(!isInnerJoin(), false); + auto iter = getConjuncts(isLeftJoin(), false); rsb = applyLocalBoolean(rsb, localStreams, iter); } @@ -974,15 +971,14 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) } // Outer joins are processed their own way - if (!isInnerJoin()) + if (rse->isOuterJoin()) { rivers.join(dependentRivers); + dependentRivers.clear(); rsb = OuterJoin(tdbb, this, rse, rivers, &sort).generate(); } else { - JoinType joinType = INNER_JOIN; - // AB: If previous rsb's are already on the stack we can't use // a navigational-retrieval for an ORDER BY because the next // streams are JOINed to the previous ones @@ -993,57 +989,60 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // AB: We could already have multiple rivers at this // point so try to do some hashing or sort/merging now. - while (generateEquiJoin(rivers, joinType)) + while (generateEquiJoin(rivers, JoinType::INNER)) ; } StreamList joinStreams(compileStreams); - fb_assert(joinStreams.getCount() != 1 || csb->csb_rpt[joinStreams[0]].csb_relation); - - while (true) + if (isInnerJoin()) { - // AB: Determine which streams have an index relationship - // with the currently active rivers. This is needed so that - // no merge is made between a new cross river and the - // currently active rivers. Where in the new cross river - // a stream depends (index) on the active rivers. - StreamList dependentStreams, freeStreams; - findDependentStreams(joinStreams, dependentStreams, freeStreams); + fb_assert(joinStreams.getCount() != 1 || csb->csb_rpt[joinStreams[0]].csb_relation); - // If we have dependent and free streams then we can't rely on - // the sort node to be used for index navigation - if (dependentStreams.hasData() && freeStreams.hasData()) + while (true) { - sort = nullptr; - sortCanBeUsed = false; - } + // AB: Determine which streams have an index relationship + // with the currently active rivers. This is needed so that + // no merge is made between a new cross river and the + // currently active rivers. Where in the new cross river + // a stream depends (index) on the active rivers. + StreamList dependentStreams, freeStreams; + findDependentStreams(joinStreams, dependentStreams, freeStreams); + + // If we have dependent and free streams then we can't rely on + // the sort node to be used for index navigation + if (dependentStreams.hasData() && freeStreams.hasData()) + { + sort = nullptr; + sortCanBeUsed = false; + } - if (dependentStreams.hasData()) - { - // Copy free streams - joinStreams.assign(freeStreams); + if (dependentStreams.hasData()) + { + // Copy free streams + joinStreams.assign(freeStreams); - // Make rivers from the dependent streams - generateInnerJoin(dependentStreams, rivers, &sort, rse->rse_plan); + // Make rivers from the dependent streams + generateInnerJoin(dependentStreams, rivers, &sort, rse->rse_plan); - // Generate one river which holds a cross join rsb between - // all currently available rivers + // Generate one river which holds a cross join rsb between + // all currently available rivers - rivers.add(FB_NEW_POOL(getPool()) CrossJoin(this, rivers, joinType)); - rivers.back()->activate(csb); - } - else - { - if (freeStreams.hasData()) - { - // Deactivate streams from rivers on stack, because - // the remaining streams don't have any indexed relationship with them - for (const auto river : rivers) - river->deactivate(csb); + rivers.add(FB_NEW_POOL(getPool()) CrossJoin(this, rivers, JoinType::INNER)); + rivers.back()->activate(csb); } + else + { + if (freeStreams.hasData()) + { + // Deactivate streams from rivers on stack, because + // the remaining streams don't have any indexed relationship with them + for (const auto river : rivers) + river->deactivate(csb); + } - break; + break; + } } } @@ -1053,10 +1052,12 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) if (rivers.isEmpty() && dependentRivers.isEmpty()) { // This case may look weird, but it's possible for recursive unions - rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, 0, nullptr, joinType); + rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, JoinType::INNER, 0, nullptr); } else { + auto joinType = JoinType::INNER; + while (rivers.hasData() || dependentRivers.hasData()) { // Re-activate remaining rivers to be hashable/mergeable @@ -1078,27 +1079,24 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) rsb = finalRiver->getRecordSource(); cardinality = rsb->getCardinality(); - if (specialSubQueries.hasData()) + if (specialRse) { - fb_assert(joinType == INNER_JOIN); - joinType = SEMI_JOIN; + fb_assert(joinType == JoinType::INNER); + joinType = rse->isSemiJoin() ? JoinType::SEMI : JoinType::ANTI; rivers.add(finalRiver); - for (const auto rse : specialSubQueries) - { - const auto sub = rse->compile(tdbb, this, true); - fb_assert(sub); + const auto sub = specialRse->compile(tdbb, this, true); + fb_assert(sub); - StreamList localStreams; - sub->findUsedStreams(localStreams); + StreamList localStreams; + sub->findUsedStreams(localStreams); - const auto subRiver = FB_NEW_POOL(getPool()) River(csb, sub, rse, localStreams); - auto& list = subRiver->isDependent(*finalRiver) ? dependentRivers : rivers; - list.add(subRiver); - } + const auto subRiver = FB_NEW_POOL(getPool()) River(csb, sub, specialRse, localStreams); + auto& list = subRiver->isDependent(*finalRiver) ? dependentRivers : rivers; + list.add(subRiver); - specialSubQueries.clear(); + specialRse = nullptr; } } } @@ -2016,7 +2014,7 @@ void Optimizer::checkSorts() // Walk trough the relations of the RSE and see if a // matching stream can be found. - if (newRse->rse_jointype == blr_inner) + if (newRse->isInnerJoin()) { if (newRse->rse_relations.getCount() == 1) node = newRse->rse_relations[0]; @@ -2389,7 +2387,7 @@ void Optimizer::formRivers(const StreamList& streams, bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) { - fb_assert(joinType != OUTER_JOIN); + fb_assert(joinType != JoinType::OUTER); ULONG selected_rivers[OPT_STREAM_BITS], selected_rivers2[OPT_STREAM_BITS]; ValueExprNode** eq_class; @@ -2576,7 +2574,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) // If any of to-be-hashed rivers is too large to be hashed efficiently, // then prefer a merge join instead of a hash join. - const bool useMergeJoin = hashOverflow; + const bool useMergeJoin = hashOverflow && (joinType == JoinType::INNER); // Build a join stream @@ -2584,7 +2582,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) RecordSource* finalRsb = nullptr; // MERGE JOIN does not support other join types yet - if (useMergeJoin && joinType == INNER_JOIN) + if (useMergeJoin && joinType == JoinType::INNER) { position = 0; for (const auto river : joinedRivers) @@ -2611,7 +2609,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) } else { - if (joinType == INNER_JOIN) + if (joinType == JoinType::INNER) { // Ensure that the largest river is placed at the first position. // It's important for a hash join to be efficient. diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index 9566b4b3882..c7478db1ef1 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -316,7 +316,8 @@ class Optimizer : public Firebird::PermanentStorage iter->flags = 0; } - // Assignment is not currently used in the code and I doubt it should be + ConjunctIterator() = delete; + ConjunctIterator(const ConjunctIterator& other) = delete; ConjunctIterator& operator=(const ConjunctIterator& other) = delete; private: @@ -329,10 +330,6 @@ class Optimizer : public Firebird::PermanentStorage { rewind(); } - - ConjunctIterator(const ConjunctIterator& other) - : begin(other.begin), end(other.end), iter(other.iter) - {} }; ConjunctIterator getBaseConjuncts() @@ -481,17 +478,27 @@ class Optimizer : public Firebird::PermanentStorage bool isInnerJoin() const { - return (rse->rse_jointype == blr_inner); + return rse->isInnerJoin(); + } + + bool isOuterJoin() const + { + return rse->isOuterJoin(); } bool isLeftJoin() const { - return (rse->rse_jointype == blr_left); + return rse->isLeftJoin(); } bool isFullJoin() const { - return (rse->rse_jointype == blr_full); + return rse->isFullJoin(); + } + + bool isSpecialJoin() const + { + return rse->isSpecialJoin(); } const StreamList& getOuterStreams() const @@ -519,11 +526,6 @@ class Optimizer : public Firebird::PermanentStorage return composeBoolean(iter, selectivity); } - bool isSemiJoined() const - { - return (rse->flags & RseNode::FLAG_SEMI_JOINED) != 0; - } - bool checkEquiJoin(BoolExprNode* boolean); bool getEquiJoinKeys(BoolExprNode* boolean, NestConst* node1, @@ -549,7 +551,7 @@ class Optimizer : public Firebird::PermanentStorage RiverList& rivers, SortNode** sortClause, const PlanNode* planClause); - bool generateEquiJoin(RiverList& rivers, JoinType joinType = INNER_JOIN); + bool generateEquiJoin(RiverList& rivers, JoinType joinType); void generateInnerJoin(StreamList& streams, RiverList& rivers, SortNode** sortClause, diff --git a/src/jrd/par.cpp b/src/jrd/par.cpp index ef08ba112b2..aa73821ab40 100644 --- a/src/jrd/par.cpp +++ b/src/jrd/par.cpp @@ -1291,8 +1291,7 @@ RseNode* PAR_rse(thread_db* tdbb, CompilerScratch* csb, SSHORT rse_op) case blr_join_type: { - const USHORT jointype = (USHORT) csb->csb_blr_reader.getByte(); - rse->rse_jointype = jointype; + const auto jointype = csb->csb_blr_reader.getByte(); if (jointype != blr_inner && jointype != blr_left && jointype != blr_right && @@ -1300,6 +1299,7 @@ RseNode* PAR_rse(thread_db* tdbb, CompilerScratch* csb, SSHORT rse_op) { PAR_syntax_error(csb, "join type clause"); } + rse->rse_jointype = jointype; break; } @@ -1341,7 +1341,7 @@ RseNode* PAR_rse(thread_db* tdbb, CompilerScratch* csb, SSHORT rse_op) // An outer join is only allowed when the stream count is 2 // and a boolean expression has been supplied - if (rse->rse_jointype == blr_inner || + if (rse->isInnerJoin() || (rse->rse_relations.getCount() == 2 && rse->rse_boolean)) { // Convert right outer joins to left joins to avoid diff --git a/src/jrd/recsrc/FullOuterJoin.cpp b/src/jrd/recsrc/FullOuterJoin.cpp index ca62d6a5a52..d5de4ffab86 100644 --- a/src/jrd/recsrc/FullOuterJoin.cpp +++ b/src/jrd/recsrc/FullOuterJoin.cpp @@ -40,7 +40,7 @@ using namespace Jrd; FullOuterJoin::FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, const StreamList& checkStreams) - : RecordSource(csb), + : Join(csb, JoinType::OUTER), m_arg1(arg1), m_arg2(arg2), m_checkStreams(csb->csb_pool, checkStreams) diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index 3ba9941c8ca..a7ed287f720 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -252,8 +252,7 @@ class HashJoin::HashTable : public PermanentStorage HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity) - : RecordSource(csb), - m_joinType(joinType), + : Join(csb, joinType), m_boolean(nullptr), m_args(csb->csb_pool, count - 1) { @@ -266,8 +265,7 @@ HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, BoolExprNode* boolean, RecordSource* const* args, NestValueArray* const* keys, double selectivity) - : RecordSource(csb), - m_joinType(OUTER_JOIN), + : Join(csb, JoinType::OUTER), m_boolean(boolean), m_args(csb->csb_pool, 1) { @@ -474,10 +472,10 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!impure->irsb_hash_table->setup(impure->irsb_leader_hash)) { - if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + if (m_joinType == JoinType::INNER || m_joinType == JoinType::SEMI) continue; - if (m_joinType == OUTER_JOIN) + if (m_joinType == JoinType::OUTER) inner->nullRecords(tdbb); return true; @@ -506,20 +504,20 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const { impure->irsb_flags |= irsb_mustread; - if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + if (m_joinType == JoinType::INNER || m_joinType == JoinType::SEMI) continue; - if (m_joinType == OUTER_JOIN) + if (m_joinType == JoinType::OUTER) inner->nullRecords(tdbb); break; } - if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::SEMI || m_joinType == JoinType::ANTI) { impure->irsb_flags |= irsb_mustread; - if (m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::ANTI) continue; } @@ -527,7 +525,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const } else if (!fetchRecord(tdbb, impure, m_args.getCount() - 1)) { - fb_assert(m_joinType == INNER_JOIN); + fb_assert(m_joinType == JoinType::INNER); impure->irsb_flags |= irsb_mustread; continue; } @@ -568,29 +566,7 @@ void HashJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned l { planEntry.className = "HashJoin"; - planEntry.lines.add().text = "Hash Join "; - - switch (m_joinType) - { - case INNER_JOIN: - planEntry.lines.back().text += "(inner)"; - break; - - case OUTER_JOIN: - planEntry.lines.back().text += "(outer)"; - break; - - case SEMI_JOIN: - planEntry.lines.back().text += "(semi)"; - break; - - case ANTI_JOIN: - planEntry.lines.back().text += "(anti)"; - break; - - default: - fb_assert(false); - } + planEntry.lines.add().text = "Hash Join " + printType(); string extras; extras.printf(" (keys: %" ULONGFORMAT", total key length: %" ULONGFORMAT")", @@ -751,7 +727,7 @@ bool HashJoin::fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) co return true; } - if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::SEMI || m_joinType == JoinType::ANTI) return false; while (true) diff --git a/src/jrd/recsrc/MergeJoin.cpp b/src/jrd/recsrc/MergeJoin.cpp index de3bf491a6d..a66bb254a1f 100644 --- a/src/jrd/recsrc/MergeJoin.cpp +++ b/src/jrd/recsrc/MergeJoin.cpp @@ -38,7 +38,7 @@ static const char* const SCRATCH = "fb_merge_"; MergeJoin::MergeJoin(CompilerScratch* csb, FB_SIZE_T count, SortedStream* const* args, const NestValueArray* const* keys) - : RecordSource(csb), + : Join(csb, JoinType::INNER), m_args(csb->csb_pool), m_keys(csb->csb_pool) { diff --git a/src/jrd/recsrc/NestedLoopJoin.cpp b/src/jrd/recsrc/NestedLoopJoin.cpp index 484cf54d0eb..d3f7781a0ac 100644 --- a/src/jrd/recsrc/NestedLoopJoin.cpp +++ b/src/jrd/recsrc/NestedLoopJoin.cpp @@ -35,12 +35,9 @@ using namespace Jrd; // Data access: nested loops join // ------------------------------ -NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, - FB_SIZE_T count, - RecordSource* const* args, - JoinType joinType) - : RecordSource(csb), - m_joinType(joinType), +NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, JoinType joinType, + FB_SIZE_T count, RecordSource* const* args) + : Join(csb, joinType), m_boolean(nullptr), m_args(csb->csb_pool, count) { @@ -57,8 +54,7 @@ NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, BoolExprNode* boolean) - : RecordSource(csb), - m_joinType(OUTER_JOIN), + : Join(csb, JoinType::OUTER), m_boolean(boolean), m_args(csb->csb_pool, 2) { @@ -107,7 +103,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const if (!(impure->irsb_flags & irsb_open)) return false; - if (m_joinType == INNER_JOIN) + if (m_joinType == JoinType::INNER) { if (impure->irsb_flags & irsb_first) { @@ -129,7 +125,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const else if (!fetchRecord(tdbb, m_args.getCount() - 1)) return false; } - else if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + else if (m_joinType == JoinType::SEMI || m_joinType == JoinType::ANTI) { const auto outer = m_args[0]; @@ -161,7 +157,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const if (m_args[i]->getRecord(tdbb)) { - if (m_joinType == ANTI_JOIN) + if (m_joinType == JoinType::ANTI) { stopArg = i; break; @@ -169,7 +165,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const } else { - if (m_joinType == SEMI_JOIN) + if (m_joinType == JoinType::SEMI) { stopArg = i; break; @@ -188,7 +184,7 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const } else { - fb_assert(m_joinType == OUTER_JOIN); + fb_assert(m_joinType == JoinType::OUTER); fb_assert(m_args.getCount() == 2); const auto outer = m_args[0]; @@ -272,30 +268,7 @@ void NestedLoopJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsi { planEntry.className = "NestedLoopJoin"; - planEntry.lines.add().text = "Nested Loop Join "; - - switch (m_joinType) - { - case INNER_JOIN: - planEntry.lines.back().text += "(inner)"; - break; - - case OUTER_JOIN: - planEntry.lines.back().text += "(outer)"; - break; - - case SEMI_JOIN: - planEntry.lines.back().text += "(semi)"; - break; - - case ANTI_JOIN: - planEntry.lines.back().text += "(anti)"; - break; - - default: - fb_assert(false); - } - + planEntry.lines.add().text = "Nested Loop Join " + printType(); printOptInfo(planEntry.lines); if (recurse) @@ -344,7 +317,7 @@ void NestedLoopJoin::nullRecords(thread_db* tdbb) const bool NestedLoopJoin::fetchRecord(thread_db* tdbb, FB_SIZE_T n) const { - fb_assert(m_joinType == INNER_JOIN); + fb_assert(m_joinType == JoinType::INNER); const RecordSource* const arg = m_args[n]; diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index f7c59497e1b..79e9a9310e6 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -53,7 +53,7 @@ namespace Jrd class BufferedStream; class PlanEntry; - enum JoinType { INNER_JOIN, OUTER_JOIN, SEMI_JOIN, ANTI_JOIN }; + enum class JoinType { INNER, OUTER, SEMI, ANTI }; // Common base for record sources, sub-queries and cursors. class AccessPath @@ -1154,11 +1154,45 @@ namespace Jrd // Multiplexing (many -> one) access methods - class NestedLoopJoin : public RecordSource + class Join : public RecordSource { public: - NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args, - JoinType joinType = INNER_JOIN); + Join(CompilerScratch* csb, JoinType joinType) + : RecordSource(csb), m_joinType(joinType) + {} + + const Firebird::string printType() const + { + switch (m_joinType) + { + case JoinType::INNER: + return "(inner)"; + + case JoinType::OUTER: + return "(outer)"; + + case JoinType::SEMI: + return "(semi)"; + + case JoinType::ANTI: + return "(anti)"; + + default: + fb_assert(false); + } + + return ""; + } + + protected: + const JoinType m_joinType; + }; + + class NestedLoopJoin : public Join + { + public: + NestedLoopJoin(CompilerScratch* csb, JoinType joinType, + FB_SIZE_T count, RecordSource* const* args); NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, BoolExprNode* boolean); @@ -1184,13 +1218,11 @@ namespace Jrd private: bool fetchRecord(thread_db*, FB_SIZE_T) const; - const JoinType m_joinType; const NestConst m_boolean; - Firebird::Array > m_args; }; - class FullOuterJoin : public RecordSource + class FullOuterJoin : public Join { public: FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, @@ -1221,7 +1253,7 @@ namespace Jrd const StreamList m_checkStreams; }; - class HashJoin : public RecordSource + class HashJoin : public Join { class HashTable; @@ -1283,14 +1315,13 @@ namespace Jrd const SubStream& sub, UCHAR* buffer) const; bool fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) const; - const JoinType m_joinType; const NestConst m_boolean; SubStream m_leader; Firebird::Array m_args; }; - class MergeJoin : public RecordSource + class MergeJoin : public Join { struct MergeFile { From 9a36aea42221f6240ff5ed38400957ff18d2bef8 Mon Sep 17 00:00:00 2001 From: dimitr Date: Mon, 21 Apr 2025 19:36:24 +0300 Subject: [PATCH 2/5] Postfixes for special joins --- src/jrd/RecordSourceNodes.cpp | 7 +++++-- src/jrd/optimizer/Optimizer.cpp | 37 +++++++-------------------------- src/jrd/optimizer/Optimizer.h | 21 +++++++++++++++++-- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/jrd/RecordSourceNodes.cpp b/src/jrd/RecordSourceNodes.cpp index fea341d9775..67ec65fbddc 100644 --- a/src/jrd/RecordSourceNodes.cpp +++ b/src/jrd/RecordSourceNodes.cpp @@ -3469,7 +3469,7 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr // Pass RseNode boolean only to inner substreams because join condition // should never exclude records from outer substreams - if (opt->isInnerJoin() || (opt->isLeftJoin() && innerSubStream)) + if (opt->isInnerJoin() || ((opt->isLeftJoin() || opt->isSpecialJoin()) && innerSubStream)) { // AB: For an (X LEFT JOIN Y) mark the outer-streams (X) as // active because the inner-streams (Y) are always "dependent" @@ -3477,7 +3477,7 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr // // dimitr: the same for lateral derived tables in inner joins - if (opt->isLeftJoin() || isLateral()) + if (!opt->isInnerJoin() || isLateral()) stateHolder.activate(); // For the LEFT JOIN, push all conjuncts except "missing" ones (e.g. IS NULL) @@ -3486,6 +3486,9 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr if (iter->containsAnyStream(rseStreams)) conjunctStack.push(iter); } + + if (opt->isSpecialJoin() && !opt->deliverJoinConjuncts(conjunctStack)) + conjunctStack.clear(); } else { diff --git a/src/jrd/optimizer/Optimizer.cpp b/src/jrd/optimizer/Optimizer.cpp index 9957596e2b7..819dc120777 100644 --- a/src/jrd/optimizer/Optimizer.cpp +++ b/src/jrd/optimizer/Optimizer.cpp @@ -591,11 +591,10 @@ namespace // Optimizer::Optimizer(thread_db* aTdbb, CompilerScratch* aCsb, RseNode* aRse, - bool parentFirstRows, double parentCardinality) + bool parentFirstRows) : PermanentStorage(*aTdbb->getDefaultPool()), tdbb(aTdbb), csb(aCsb), rse(aRse), firstRows(rse->firstRows.valueOr(parentFirstRows)), - cardinality(parentCardinality), compileStreams(getPool()), bedStreams(getPool()), keyStreams(getPool()), @@ -651,7 +650,7 @@ RecordSource* Optimizer::compile(RseNode* subRse, BoolExprNodeStack* parentStack // if we're going to sort/aggregate the resultset afterwards const bool subFirstRows = firstRows && !rse->rse_sorted && !rse->rse_aggregate; - Optimizer subOpt(tdbb, csb, subRse, subFirstRows, cardinality); + Optimizer subOpt(tdbb, csb, subRse, subFirstRows); const auto rsb = subOpt.compile(parentStack); if (parentStack && subOpt.isInnerJoin()) @@ -702,31 +701,11 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) conjunctCount += distributeEqualities(conjunctStack, conjunctCount); - if (parentStack) - { - // AB: If we have limit our retrieval with FIRST / SKIP syntax then - // we may not deliver above conditions (from higher rse's) to this - // rse, because the results should be consistent. - if (rse->rse_skip || rse->rse_first) - parentStack = nullptr; - - if (isSpecialJoin() && parentStack->hasData()) - { - // We have a semi-join, look at the parent (priorly joined streams) cardinality. - // If it's known to be not very small, nullify the parent conjuncts - // to give up a possible nested loop join in favor of a hash join. - // Here we assume every equi-join condition having a default selectivity (0.1). - // TODO: replace with a proper cost-based decision in the future. - - double subSelectivity = MAXIMUM_SELECTIVITY; - for (auto count = parentStack->getCount(); count; count--) - subSelectivity *= DEFAULT_SELECTIVITY; - const auto thresholdCardinality = MINIMUM_CARDINALITY / subSelectivity; - - if (!cardinality || cardinality > thresholdCardinality) - parentStack = nullptr; - } - } + // AB: If we have limit our retrieval with FIRST / SKIP syntax then + // we may not deliver above conditions (from higher rse's) to this + // rse, because the results should be consistent. + if (rse->rse_skip || rse->rse_first) + parentStack = nullptr; // Set base-point before the parent/distributed nodes begin. const unsigned baseCount = conjunctCount; @@ -905,7 +884,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) bool computable = false; // AB: Save all outer-part streams - if (isInnerJoin() || (isLeftJoin() && !innerSubStream)) + if (isInnerJoin() || ((isLeftJoin() || isSpecialJoin()) && !innerSubStream)) { if (node->computable(csb, INVALID_STREAM, false)) computable = true; diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index c7478db1ef1..a4d415ae4f9 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -438,6 +438,23 @@ class Optimizer : public Firebird::PermanentStorage selectivity = minSelectivity + diffSelectivity * factor; } + bool deliverJoinConjuncts(const BoolExprNodeStack& conjuncts) + { + fb_assert(conjuncts.hasData()); + + // Look at cardinality of the priorly joined streams. If it's known to be + // not very small, give up a possible nested loop join in favor of a hash join. + // Here we assume every equi-join condition having a default selectivity (0.1). + // TODO: replace with a proper cost-based decision in the future. + + double subSelectivity = MAXIMUM_SELECTIVITY; + for (auto count = conjuncts.getCount(); count; count--) + subSelectivity *= DEFAULT_SELECTIVITY; + const auto thresholdCardinality = MINIMUM_CARDINALITY / subSelectivity; + + return (cardinality && cardinality <= thresholdCardinality); + } + static RecordSource* compile(thread_db* tdbb, CompilerScratch* csb, RseNode* rse) { bool firstRows = false; @@ -452,7 +469,7 @@ class Optimizer : public Firebird::PermanentStorage firstRows = attachment->att_opt_first_rows.valueOr(defaultFirstRows); } - return Optimizer(tdbb, csb, rse, firstRows, 0).compile(nullptr); + return Optimizer(tdbb, csb, rse, firstRows).compile(nullptr); } ~Optimizer(); @@ -537,7 +554,7 @@ class Optimizer : public Firebird::PermanentStorage private: Optimizer(thread_db* aTdbb, CompilerScratch* aCsb, RseNode* aRse, - bool parentFirstRows, double parentCardinality); + bool parentFirstRows); RecordSource* compile(BoolExprNodeStack* parentStack); From 8332d4ffa98f5308057c77c119cba43e2106e56e Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Wed, 18 Jun 2025 21:11:11 +0300 Subject: [PATCH 3/5] Follow Adriano's suggestion --- src/jrd/RecordSourceNodes.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/jrd/RecordSourceNodes.h b/src/jrd/RecordSourceNodes.h index 0852eeb71b6..e26105be6a5 100644 --- a/src/jrd/RecordSourceNodes.h +++ b/src/jrd/RecordSourceNodes.h @@ -717,19 +717,14 @@ class RseNode final : public TypedNode Date: Wed, 18 Jun 2025 21:11:58 +0300 Subject: [PATCH 4/5] Further refactoringof the joins: move common code into the base class --- src/jrd/recsrc/FullOuterJoin.cpp | 87 ++++------------- src/jrd/recsrc/HashJoin.cpp | 129 +++++-------------------- src/jrd/recsrc/MergeJoin.cpp | 97 +++---------------- src/jrd/recsrc/NestedLoopJoin.cpp | 75 ++------------- src/jrd/recsrc/RecordSource.h | 153 ++++++++++++++++++------------ 5 files changed, 156 insertions(+), 385 deletions(-) diff --git a/src/jrd/recsrc/FullOuterJoin.cpp b/src/jrd/recsrc/FullOuterJoin.cpp index d5de4ffab86..8a828a97629 100644 --- a/src/jrd/recsrc/FullOuterJoin.cpp +++ b/src/jrd/recsrc/FullOuterJoin.cpp @@ -40,15 +40,16 @@ using namespace Jrd; FullOuterJoin::FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, const StreamList& checkStreams) - : Join(csb, JoinType::OUTER), - m_arg1(arg1), - m_arg2(arg2), + : Join(csb, 2, JoinType::OUTER), m_checkStreams(csb->csb_pool, checkStreams) { - fb_assert(m_arg1 && m_arg2); + fb_assert(arg1 && arg2); m_impure = csb->allocImpure(); m_cardinality = arg1->getCardinality() + arg2->getCardinality(); + + m_args.add(arg1); + m_args.add(arg2); } void FullOuterJoin::internalOpen(thread_db* tdbb) const @@ -58,25 +59,22 @@ void FullOuterJoin::internalOpen(thread_db* tdbb) const impure->irsb_flags = irsb_open | irsb_first; - m_arg1->open(tdbb); + m_args[0]->open(tdbb); } void FullOuterJoin::close(thread_db* tdbb) const { - Request* const request = tdbb->getRequest(); + const auto request = tdbb->getRequest(); invalidateRecords(request); - Impure* const impure = request->getImpure(m_impure); + const auto impure = request->getImpure(m_impure); if (impure->irsb_flags & irsb_open) { impure->irsb_flags &= ~irsb_open; - if (impure->irsb_flags & irsb_first) - m_arg1->close(tdbb); - else - m_arg2->close(tdbb); + Join::close(tdbb); } } @@ -84,26 +82,29 @@ bool FullOuterJoin::internalGetRecord(thread_db* tdbb) const { JRD_reschedule(tdbb); - Request* const request = tdbb->getRequest(); - Impure* const impure = request->getImpure(m_impure); + const auto request = tdbb->getRequest(); + const auto impure = request->getImpure(m_impure); if (!(impure->irsb_flags & irsb_open)) return false; + const auto arg1 = m_args[0]; + const auto arg2 = m_args[1]; + if (impure->irsb_flags & irsb_first) { - if (m_arg1->getRecord(tdbb)) + if (arg1->getRecord(tdbb)) return true; impure->irsb_flags &= ~irsb_first; - m_arg1->close(tdbb); - m_arg2->open(tdbb); + arg1->close(tdbb); + arg2->open(tdbb); } // We should exclude matching records from the right-joined (second) record source, // as they're already returned from the left-joined (first) record source - while (m_arg2->getRecord(tdbb)) + while (arg2->getRecord(tdbb)) { bool matched = false; @@ -123,25 +124,11 @@ bool FullOuterJoin::internalGetRecord(thread_db* tdbb) const return false; } -bool FullOuterJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult FullOuterJoin::lockRecord(thread_db* tdbb) const -{ - SET_TDBB(tdbb); - - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void FullOuterJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { level++; plan += "JOIN ("; - m_arg1->getLegacyPlan(tdbb, plan, level); - plan += ", "; - m_arg2->getLegacyPlan(tdbb, plan, level); + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } @@ -152,39 +139,5 @@ void FullOuterJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsig planEntry.lines.add().text = "Full Outer Join"; printOptInfo(planEntry.lines); - if (recurse) - { - ++level; - m_arg1->getPlan(tdbb, planEntry.children.add(), level, recurse); - m_arg2->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void FullOuterJoin::markRecursive() -{ - m_arg1->markRecursive(); - m_arg2->markRecursive(); -} - -void FullOuterJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - m_arg1->findUsedStreams(streams, expandAll); - m_arg2->findUsedStreams(streams, expandAll); -} - -bool FullOuterJoin::isDependent(const StreamList& streams) const -{ - return m_arg1->isDependent(streams) || m_arg2->isDependent(streams); -} - -void FullOuterJoin::invalidateRecords(Request* request) const -{ - m_arg1->invalidateRecords(request); - m_arg2->invalidateRecords(request); -} - -void FullOuterJoin::nullRecords(thread_db* tdbb) const -{ - m_arg1->nullRecords(tdbb); - m_arg2->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index a7ed287f720..4dcf757a3cc 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -252,9 +252,8 @@ class HashJoin::HashTable : public PermanentStorage HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity) - : Join(csb, joinType), - m_boolean(nullptr), - m_args(csb->csb_pool, count - 1) + : Join(csb, count, joinType), + m_subs(csb->csb_pool, count - 1) { fb_assert(count >= 2); @@ -265,9 +264,8 @@ HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, BoolExprNode* boolean, RecordSource* const* args, NestValueArray* const* keys, double selectivity) - : Join(csb, JoinType::OUTER), - m_boolean(boolean), - m_args(csb->csb_pool, 1) + : Join(csb, 2, JoinType::OUTER, boolean), + m_subs(csb->csb_pool, 1) { init(tdbb, csb, 2, args, keys, selectivity); } @@ -278,6 +276,7 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, { m_impure = csb->allocImpure(); + m_args.add(args[0]); m_leader.source = args[0]; m_leader.keys = keys[0]; const FB_SIZE_T leaderKeyCount = m_leader.keys->getCount(); @@ -312,13 +311,14 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, for (FB_SIZE_T i = 1; i < count; i++) { - RecordSource* const sub_rsb = args[i]; - fb_assert(sub_rsb); + const auto subRsb = args[i]; + fb_assert(subRsb); - m_cardinality *= sub_rsb->getCardinality(); + m_args.add(subRsb); + m_cardinality *= subRsb->getCardinality(); SubStream sub; - sub.buffer = FB_NEW_POOL(csb->csb_pool) BufferedStream(csb, sub_rsb); + sub.buffer = FB_NEW_POOL(csb->csb_pool) BufferedStream(csb, subRsb); sub.keys = keys[i]; const FB_SIZE_T subKeyCount = sub.keys->getCount(); sub.keyLengths = FB_NEW_POOL(csb->csb_pool) ULONG[subKeyCount]; @@ -348,15 +348,11 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, sub.totalKeyLength += keyLength; } - m_args.add(sub); + m_subs.add(sub); } if (!selectivity) - { - selectivity = MAXIMUM_SELECTIVITY; - while (keyCount--) - selectivity *= REDUCE_SELECTIVITY_FACTOR_EQUALITY; - } + selectivity = pow(REDUCE_SELECTIVITY_FACTOR_EQUALITY, keyCount); m_cardinality *= selectivity; } @@ -388,16 +384,13 @@ void HashJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; + Join::close(tdbb); + delete impure->irsb_hash_table; impure->irsb_hash_table = nullptr; delete[] impure->irsb_leader_buffer; impure->irsb_leader_buffer = nullptr; - - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i].buffer->close(tdbb); - - m_leader.source->close(tdbb); } } @@ -411,7 +404,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!(impure->irsb_flags & irsb_open)) return false; - const auto inner = m_args.front().source; + const auto inner = m_subs.front().source; while (true) { @@ -435,7 +428,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!impure->irsb_hash_table && !impure->irsb_leader_buffer) { auto& pool = *tdbb->getDefaultPool(); - const auto argCount = m_args.getCount(); + const auto argCount = m_subs.getCount(); impure->irsb_hash_table = FB_NEW_POOL(pool) HashTable(pool, argCount); impure->irsb_leader_buffer = FB_NEW_POOL(pool) UCHAR[m_leader.totalKeyLength]; @@ -447,14 +440,14 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const // Read and cache the inner streams. While doing that, // hash the join condition values and populate hash tables. - m_args[i].buffer->open(tdbb); + m_subs[i].buffer->open(tdbb); ULONG counter = 0; - const auto keyBuffer = buffer.getBuffer(m_args[i].totalKeyLength, false); + const auto keyBuffer = buffer.getBuffer(m_subs[i].totalKeyLength, false); - while (m_args[i].buffer->getRecord(tdbb)) + while (m_subs[i].buffer->getRecord(tdbb)) { - const auto hash = computeHash(tdbb, request, m_args[i], keyBuffer); + const auto hash = computeHash(tdbb, request, m_subs[i], keyBuffer); impure->irsb_hash_table->put(i, hash, counter++); } } @@ -491,7 +484,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const { bool found = true; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) + for (FB_SIZE_T i = 0; i < m_subs.getCount(); i++) { if (!fetchRecord(tdbb, impure, i)) { @@ -523,7 +516,7 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const impure->irsb_flags &= ~irsb_first; } - else if (!fetchRecord(tdbb, impure, m_args.getCount() - 1)) + else if (!fetchRecord(tdbb, impure, m_subs.getCount() - 1)) { fb_assert(m_joinType == JoinType::INNER); impure->irsb_flags |= irsb_mustread; @@ -536,29 +529,11 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const return true; } -bool HashJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult HashJoin::lockRecord(thread_db* /*tdbb*/) const -{ - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void HashJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { level++; plan += "HASH ("; - m_leader.source->getLegacyPlan(tdbb, plan, level); - plan += ", "; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - { - if (i) - plan += ", "; - - m_args[i].source->getLegacyPlan(tdbb, plan, level); - } + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } @@ -576,61 +551,7 @@ void HashJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned l printOptInfo(planEntry.lines); - if (recurse) - { - ++level; - - m_leader.source->getPlan(tdbb, planEntry.children.add(), level, recurse); - - for (const auto& arg : m_args) - arg.source->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void HashJoin::markRecursive() -{ - m_leader.source->markRecursive(); - - for (const auto& arg : m_args) - arg.source->markRecursive(); -} - -void HashJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - m_leader.source->findUsedStreams(streams, expandAll); - - for (const auto& arg : m_args) - arg.source->findUsedStreams(streams, expandAll); -} - -bool HashJoin::isDependent(const StreamList& streams) const -{ - if (m_leader.source->isDependent(streams)) - return true; - - for (const auto& arg : m_args) - { - if (arg.source->isDependent(streams)) - return true; - } - - return (m_boolean && m_boolean->containsAnyStream(streams)); -} - -void HashJoin::invalidateRecords(Request* request) const -{ - m_leader.source->invalidateRecords(request); - - for (const auto& arg : m_args) - arg.source->invalidateRecords(request); -} - -void HashJoin::nullRecords(thread_db* tdbb) const -{ - m_leader.source->nullRecords(tdbb); - - for (const auto& arg : m_args) - arg.source->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } ULONG HashJoin::computeHash(thread_db* tdbb, @@ -716,7 +637,7 @@ bool HashJoin::fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) co { HashTable* const hashTable = impure->irsb_hash_table; - const BufferedStream* const arg = m_args[stream].buffer; + const BufferedStream* const arg = m_subs[stream].buffer; ULONG position; if (hashTable->iterate(stream, impure->irsb_leader_hash, position)) diff --git a/src/jrd/recsrc/MergeJoin.cpp b/src/jrd/recsrc/MergeJoin.cpp index a66bb254a1f..bc65abf0c60 100644 --- a/src/jrd/recsrc/MergeJoin.cpp +++ b/src/jrd/recsrc/MergeJoin.cpp @@ -38,32 +38,21 @@ static const char* const SCRATCH = "fb_merge_"; MergeJoin::MergeJoin(CompilerScratch* csb, FB_SIZE_T count, SortedStream* const* args, const NestValueArray* const* keys) - : Join(csb, JoinType::INNER), - m_args(csb->csb_pool), - m_keys(csb->csb_pool) + : Join(csb, count, JoinType::INNER), + m_keys(csb->csb_pool, count) { const size_t size = sizeof(struct Impure) + count * sizeof(Impure::irsb_mrg_repeat); m_impure = csb->allocImpure(FB_ALIGNMENT, static_cast(size)); m_cardinality = MINIMUM_CARDINALITY; - m_args.resize(count); - m_keys.resize(count); - for (FB_SIZE_T i = 0; i < count; i++) { - fb_assert(args[i]); - m_args[i] = args[i]; - - m_cardinality *= args[i]->getCardinality(); - if (i) - { - for (auto keyCount = keys[i]->getCount(); keyCount; keyCount--) - m_cardinality *= REDUCE_SELECTIVITY_FACTOR_EQUALITY; - } - - fb_assert(keys[i]); - m_keys[i] = keys[i]; + m_args.add(args[i]); + m_cardinality *= args[i]->getCardinality() * + pow(REDUCE_SELECTIVITY_FACTOR_EQUALITY, keys[i]->getCount()); } + + m_keys.add(keys, count); } void MergeJoin::internalOpen(thread_db* tdbb) const @@ -114,14 +103,12 @@ void MergeJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; + Join::close(tdbb); + for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) { Impure::irsb_mrg_repeat* const tail = &impure->irsb_mrg_rpt[i]; - // close all the substreams for the sort-merge - - m_args[i]->close(tdbb); - // Release memory associated with the merge file block and the sort file block. // Also delete the merge file if one exists. @@ -335,27 +322,11 @@ bool MergeJoin::internalGetRecord(thread_db* tdbb) const return true; } -bool MergeJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult MergeJoin::lockRecord(thread_db* /*tdbb*/) const -{ - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void MergeJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { level++; plan += "MERGE ("; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - { - if (i) - plan += ", "; - - m_args[i]->getLegacyPlan(tdbb, plan, level); - } + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } @@ -363,55 +334,17 @@ void MergeJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned { planEntry.className = "MergeJoin"; + planEntry.lines.add().text = "Merge Join " + printType(); + string extras; extras.printf(" (keys: %" ULONGFORMAT", total key length: %" ULONGFORMAT")", m_keys[0]->getCount(), m_args[0]->getKeyLength()); - planEntry.lines.add().text = "Merge Join (inner)" + extras; - printOptInfo(planEntry.lines); + planEntry.lines.back().text += extras; - if (recurse) - { - ++level; - - for (const auto arg : m_args) - arg->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void MergeJoin::markRecursive() -{ - for (auto arg : m_args) - arg->markRecursive(); -} - -void MergeJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - for (const auto arg : m_args) - arg->findUsedStreams(streams, expandAll); -} - -bool MergeJoin::isDependent(const StreamList& streams) const -{ - for (const auto arg : m_args) - { - if (arg->isDependent(streams)) - return true; - } - - return false; -} - -void MergeJoin::invalidateRecords(Request* request) const -{ - for (const auto arg : m_args) - arg->invalidateRecords(request); -} + printOptInfo(planEntry.lines); -void MergeJoin::nullRecords(thread_db* tdbb) const -{ - for (const auto arg : m_args) - arg->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } int MergeJoin::compare(thread_db* tdbb, const NestValueArray* node1, diff --git a/src/jrd/recsrc/NestedLoopJoin.cpp b/src/jrd/recsrc/NestedLoopJoin.cpp index d3f7781a0ac..cd6567fab92 100644 --- a/src/jrd/recsrc/NestedLoopJoin.cpp +++ b/src/jrd/recsrc/NestedLoopJoin.cpp @@ -37,9 +37,7 @@ using namespace Jrd; NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, JoinType joinType, FB_SIZE_T count, RecordSource* const* args) - : Join(csb, joinType), - m_boolean(nullptr), - m_args(csb->csb_pool, count) + : Join(csb, count, joinType) { m_impure = csb->allocImpure(); m_cardinality = MINIMUM_CARDINALITY; @@ -54,18 +52,15 @@ NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, JoinType joinType, NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, BoolExprNode* boolean) - : Join(csb, JoinType::OUTER), - m_boolean(boolean), - m_args(csb->csb_pool, 2) + : Join(csb, 2, JoinType::OUTER, boolean) { fb_assert(outer && inner); m_impure = csb->allocImpure(); + m_cardinality = outer->getCardinality() * inner->getCardinality(); m_args.add(outer); m_args.add(inner); - - m_cardinality = outer->getCardinality() * inner->getCardinality(); } void NestedLoopJoin::internalOpen(thread_db* tdbb) const @@ -88,8 +83,7 @@ void NestedLoopJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; - for (const auto arg : m_args) - arg->close(tdbb); + Join::close(tdbb); } } @@ -237,29 +231,13 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const return true; } -bool NestedLoopJoin::refetchRecord(thread_db* /*tdbb*/) const -{ - return true; -} - -WriteLockResult NestedLoopJoin::lockRecord(thread_db* /*tdbb*/) const -{ - status_exception::raise(Arg::Gds(isc_record_lock_not_supp)); -} - void NestedLoopJoin::getLegacyPlan(thread_db* tdbb, string& plan, unsigned level) const { if (m_args.hasData()) { level++; plan += "JOIN ("; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - { - if (i) - plan += ", "; - - m_args[i]->getLegacyPlan(tdbb, plan, level); - } + Join::getLegacyPlan(tdbb, plan, level); plan += ")"; } } @@ -271,48 +249,7 @@ void NestedLoopJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsi planEntry.lines.add().text = "Nested Loop Join " + printType(); printOptInfo(planEntry.lines); - if (recurse) - { - ++level; - - for (const auto arg : m_args) - arg->getPlan(tdbb, planEntry.children.add(), level, recurse); - } -} - -void NestedLoopJoin::markRecursive() -{ - for (auto arg : m_args) - arg->markRecursive(); -} - -void NestedLoopJoin::findUsedStreams(StreamList& streams, bool expandAll) const -{ - for (const auto arg : m_args) - arg->findUsedStreams(streams, expandAll); -} - -bool NestedLoopJoin::isDependent(const StreamList& streams) const -{ - for (const auto arg : m_args) - { - if (arg->isDependent(streams)) - return true; - } - - return (m_boolean && m_boolean->containsAnyStream(streams)); -} - -void NestedLoopJoin::invalidateRecords(Request* request) const -{ - for (const auto arg : m_args) - arg->invalidateRecords(request); -} - -void NestedLoopJoin::nullRecords(thread_db* tdbb) const -{ - for (const auto arg : m_args) - arg->nullRecords(tdbb); + Join::getPlan(tdbb, planEntry, level, recurse); } bool NestedLoopJoin::fetchRecord(thread_db* tdbb, FB_SIZE_T n) const diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index c4e7e4f2580..e1b8994f8a4 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -1153,12 +1153,94 @@ namespace Jrd // Multiplexing (many -> one) access methods + template class Join : public RecordSource { public: - Join(CompilerScratch* csb, JoinType joinType) - : RecordSource(csb), m_joinType(joinType) - {} + Join(CompilerScratch* csb, FB_SIZE_T count, JoinType joinType, BoolExprNode* boolean = nullptr) + : RecordSource(csb), m_joinType(joinType), m_boolean(boolean), + m_args(csb->csb_pool, count) + { + fb_assert(!m_boolean || m_joinType == JoinType::OUTER); + } + + virtual void close(thread_db* tdbb) const + { + for (const auto& arg : m_args) + arg->close(tdbb); + } + + bool refetchRecord(thread_db* /*tdbb*/) const override + { + return true; + } + + WriteLockResult lockRecord(thread_db* /*tdbb*/) const override + { + Firebird::status_exception::raise(Firebird::Arg::Gds(isc_record_lock_not_supp)); + } + + void markRecursive() override + { + for (auto& arg : m_args) + arg->markRecursive(); + } + + void findUsedStreams(StreamList& streams, bool expandAll) const override + { + for (const auto& arg : m_args) + arg->findUsedStreams(streams, expandAll); + } + + bool isDependent(const StreamList& streams) const override + { + for (const auto& arg : m_args) + { + if (arg->isDependent(streams)) + return true; + } + + return (m_boolean && m_boolean->containsAnyStream(streams)); + } + + void invalidateRecords(Request* request) const override + { + for (const auto& arg : m_args) + arg->invalidateRecords(request); + } + + void nullRecords(thread_db* tdbb) const override + { + for (const auto& arg : m_args) + arg->nullRecords(tdbb); + } + + protected: + const JoinType m_joinType; + const NestConst m_boolean; + Firebird::Array > m_args; + + void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const + { + for (const auto& arg : m_args) + { + if (arg != m_args.front()) + plan += ", "; + + arg->getLegacyPlan(tdbb, plan, level); + } + } + + void getPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const + { + if (recurse) + { + ++level; + + for (const auto& arg : m_args) + arg->getPlan(tdbb, planEntry.children.add(), level, recurse); + } + } const Firebird::string printType() const { @@ -1182,12 +1264,9 @@ namespace Jrd return ""; } - - protected: - const JoinType m_joinType; }; - class NestedLoopJoin : public Join + class NestedLoopJoin : public Join { public: NestedLoopJoin(CompilerScratch* csb, JoinType joinType, @@ -1196,19 +1275,8 @@ namespace Jrd BoolExprNode* boolean); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - protected: void internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const override; void internalOpen(thread_db* tdbb) const override; @@ -1216,43 +1284,27 @@ namespace Jrd private: bool fetchRecord(thread_db*, FB_SIZE_T) const; - - const NestConst m_boolean; - Firebird::Array > m_args; }; - class FullOuterJoin : public Join + class FullOuterJoin : public Join { public: FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, const StreamList& checkStreams); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - protected: void internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const override; void internalOpen(thread_db* tdbb) const override; bool internalGetRecord(thread_db* tdbb) const override; private: - NestConst m_arg1; - NestConst m_arg2; const StreamList m_checkStreams; }; - class HashJoin : public Join + class HashJoin : public Join { class HashTable; @@ -1286,19 +1338,8 @@ namespace Jrd double selectivity = 0); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - static unsigned maxCapacity(); protected: @@ -1314,13 +1355,11 @@ namespace Jrd const SubStream& sub, UCHAR* buffer) const; bool fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) const; - const NestConst m_boolean; - SubStream m_leader; - Firebird::Array m_args; + Firebird::Array m_subs; }; - class MergeJoin : public Join + class MergeJoin : public Join { struct MergeFile { @@ -1356,19 +1395,8 @@ namespace Jrd const NestValueArray* const* keys); void close(thread_db* tdbb) const override; - - bool refetchRecord(thread_db* tdbb) const override; - WriteLockResult lockRecord(thread_db* tdbb) const override; - void getLegacyPlan(thread_db* tdbb, Firebird::string& plan, unsigned level) const override; - void markRecursive() override; - void invalidateRecords(Request* request) const override; - - void findUsedStreams(StreamList& streams, bool expandAll = false) const override; - bool isDependent(const StreamList& streams) const override; - void nullRecords(thread_db* tdbb) const override; - protected: void internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned level, bool recurse) const override; void internalOpen(thread_db* tdbb) const override; @@ -1381,7 +1409,6 @@ namespace Jrd SLONG getRecordByIndex(thread_db* tdbb, FB_SIZE_T index) const; bool fetchRecord(thread_db* tdbb, FB_SIZE_T index) const; - Firebird::Array > m_args; Firebird::Array m_keys; }; From 6f30f1992aff5492997c70d7b3dc475bed835abb Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Wed, 18 Jun 2025 21:18:15 +0300 Subject: [PATCH 5/5] Correction --- src/jrd/recsrc/HashJoin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index 4dcf757a3cc..a00a239ed5f 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -276,7 +276,6 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, { m_impure = csb->allocImpure(); - m_args.add(args[0]); m_leader.source = args[0]; m_leader.keys = keys[0]; const FB_SIZE_T leaderKeyCount = m_leader.keys->getCount(); @@ -284,6 +283,7 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, m_leader.totalKeyLength = 0; m_cardinality = m_leader.source->getCardinality(); + m_args.add(m_leader.source); for (FB_SIZE_T j = 0; j < leaderKeyCount; j++) { @@ -314,7 +314,6 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, const auto subRsb = args[i]; fb_assert(subRsb); - m_args.add(subRsb); m_cardinality *= subRsb->getCardinality(); SubStream sub; @@ -349,6 +348,7 @@ void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, } m_subs.add(sub); + m_args.add(sub.buffer); } if (!selectivity)