Skip to content

Commit 7379d61

Browse files
alyacbMongoDB Bot
authored andcommitted
SERVER-99617 Boost DISTINCT_SCAN productivity score (#31618)
GitOrigin-RevId: 5d7295b
1 parent 00df2da commit 7379d61

File tree

5 files changed

+110
-14
lines changed

5 files changed

+110
-14
lines changed

src/mongo/db/exec/multi_plan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
267267

268268
// After picking best plan, ranking will own plan stats from candidate solutions (winner and
269269
// losers).
270-
auto statusWithRanking = plan_ranker::pickBestPlan(_candidates);
270+
auto statusWithRanking = plan_ranker::pickBestPlan(_candidates, *_query);
271271
if (!statusWithRanking.isOK()) {
272272
return statusWithRanking.getStatus();
273273
}

src/mongo/db/query/plan_ranker.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ void logScoreFormula(std::function<std::string()> formula,
5757
double noFetchBonus,
5858
double noSortBonus,
5959
double noIxisectBonus,
60-
double tieBreakers) {
60+
double tieBreakers,
61+
boost::optional<double> groupByDistinctBonus) {
6162
LOGV2_DEBUG(
6263
20961, 2, "Score formula", "formula"_attr = [&]() {
6364
StringBuilder sb;
@@ -69,6 +70,9 @@ void logScoreFormula(std::function<std::string()> formula,
6970
<< " noFetchBonus + " << str::convertDoubleToString(noSortBonus) << " noSortBonus + "
7071
<< str::convertDoubleToString(noIxisectBonus)
7172
<< " noIxisectBonus = " << str::convertDoubleToString(tieBreakers) << ")";
73+
if (groupByDistinctBonus) {
74+
sb << " + groupByDistinctBonus(" << *groupByDistinctBonus << ")";
75+
}
7276
return sb.str();
7377
}());
7478
}
@@ -77,6 +81,14 @@ void logScoreBoost(double score) {
7781
LOGV2_DEBUG(20962, 5, "Score boosted due to intersection forcing", "newScore"_attr = score);
7882
}
7983

84+
void logScoreGroupByDistinctBoost(double bonus) {
85+
LOGV2_DEBUG(
86+
9961700,
87+
5,
88+
"Adding groupByDistinctBonus, boost formula is: std::min(1 - productivity, productivity)",
89+
"groupByDistinctBonus"_attr = bonus);
90+
}
91+
8092
void logScoringPlan(std::function<std::string()> solution,
8193
std::function<std::string()> explain,
8294
std::function<std::string()> planSummary,

src/mongo/db/query/plan_ranker.h

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ void logScoreFormula(std::function<std::string()> formula,
6969
double noFetchBonus,
7070
double noSortBonus,
7171
double noIxisectBonus,
72-
double tieBreakers);
72+
double tieBreakers,
73+
boost::optional<double> groupByDistinctBonus);
7374
void logScoreBoost(double score);
75+
void logScoreGroupByDistinctBoost(double bonus);
7476
void logScoringPlan(std::function<std::string()> solution,
7577
std::function<std::string()> explain,
7678
std::function<std::string()> planSummary,
@@ -103,7 +105,7 @@ class PlanScorer {
103105
PlanScorer() = default;
104106
virtual ~PlanScorer() = default;
105107

106-
double calculateScore(const PlanStageStatsType* stats) const {
108+
double calculateScore(const PlanStageStatsType* stats, const CanonicalQuery& cq) const {
107109
// We start all scores at 1. Our "no plan selected" score is 0 and we want all plans to
108110
// be greater than that.
109111
const double baseScore = 1;
@@ -139,7 +141,23 @@ class PlanScorer {
139141
}
140142

141143
const double tieBreakers = noFetchBonus + noSortBonus + noIxisectBonus;
142-
double score = baseScore + productivity + tieBreakers;
144+
boost::optional<double> groupByDistinctBonus;
145+
146+
// Apply a large bonus to DISTINCT_SCAN plans in an aggregaton context, as the
147+
// $groupByDistinct rewrite can reduce the amount of overall work the query needs to do.
148+
if (cq.getExpCtx()->isFeatureFlagShardFilteringDistinctScanEnabled() && cq.getDistinct() &&
149+
!cq.cqPipeline().empty() && hasStage(STAGE_DISTINCT_SCAN, stats)) {
150+
// Assume that every advance in a distinct scan is twice as productive as the
151+
// equivalent index scan, up to the number of works actually done by the
152+
// distinct scan, in order to favor distinct scans. The maximum bonus is 0.5
153+
// (productivity = 0.5), while the minimum bonus is 0 (productivity = 1). If the
154+
// distinct scan is not very productive (< 0.5) we don't want to prioritize it
155+
// too much; conversely, if it is very productive, we don't need a huge bonus.
156+
groupByDistinctBonus = std::min(1 - productivity, productivity);
157+
log_detail::logScoreGroupByDistinctBoost(*groupByDistinctBonus);
158+
}
159+
160+
double score = baseScore + productivity + tieBreakers + groupByDistinctBonus.value_or(0.0);
143161

144162
log_detail::logScoreFormula([this, stats] { return getProductivityFormula(stats); },
145163
score,
@@ -148,7 +166,8 @@ class PlanScorer {
148166
noFetchBonus,
149167
noSortBonus,
150168
noIxisectBonus,
151-
tieBreakers);
169+
tieBreakers,
170+
groupByDistinctBonus);
152171

153172
if (internalQueryForceIntersectionPlans.load()) {
154173
if (hasStage(STAGE_AND_HASH, stats) || hasStage(STAGE_AND_SORTED, stats)) {

src/mongo/db/query/plan_ranker_test.cpp

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,19 @@
3131
* This file contains tests for mongo/db/query/plan_ranker.h
3232
*/
3333

34-
#include "mongo/db/query/plan_ranker.h"
35-
3634
#include <utility>
3735
#include <vector>
3836

37+
#include "mongo/bson/bsonelement.h"
38+
#include "mongo/bson/bsonmisc.h"
3939
#include "mongo/db/exec/plan_stats.h"
40+
#include "mongo/db/namespace_string.h"
41+
#include "mongo/db/pipeline/document_source_group.h"
42+
#include "mongo/db/pipeline/expression_context_for_test.h"
43+
#include "mongo/db/query/canonical_query.h"
4044
#include "mongo/db/query/plan_ranker_util.h"
45+
#include "mongo/db/query/stage_types.h"
46+
#include "mongo/idl/server_parameter_test_util.h"
4147
#include "mongo/unittest/assert.h"
4248
#include "mongo/unittest/framework.h"
4349

@@ -49,11 +55,21 @@ using std::make_unique;
4955
using std::string;
5056
using std::unique_ptr;
5157

58+
unique_ptr<CanonicalQuery> makeCanonicalQuery() {
59+
auto expCtx = new ExpressionContextForTest();
60+
auto findCommand = std::make_unique<FindCommandRequest>(NamespaceString());
61+
return std::make_unique<CanonicalQuery>(CanonicalQueryParams{
62+
.expCtx = expCtx, .parsedFind = ParsedFindCommandParams{std::move(findCommand)}});
63+
}
64+
5265
unique_ptr<PlanStageStats> makeStats(const char* name,
5366
StageType type,
54-
unique_ptr<SpecificStats> specific) {
67+
unique_ptr<SpecificStats> specific,
68+
size_t works = 1,
69+
size_t advances = 1) {
5570
auto stats = make_unique<PlanStageStats>(name, type);
56-
stats->common.works = 1;
71+
stats->common.works = works;
72+
stats->common.advanced = advances;
5773
stats->specific = std::move(specific);
5874
return stats;
5975
}
@@ -74,11 +90,59 @@ TEST(PlanRankerTest, NoFetchBonus) {
7490
badPlan->children[0]->children.emplace_back(
7591
makeStats("IXSCAN", STAGE_IXSCAN, make_unique<IndexScanStats>()));
7692

93+
auto cq = makeCanonicalQuery();
7794
auto scorer = plan_ranker::makePlanScorer();
78-
auto goodScore = scorer->calculateScore(goodPlan.get());
79-
auto badScore = scorer->calculateScore(badPlan.get());
95+
auto goodScore = scorer->calculateScore(goodPlan.get(), *cq);
96+
auto badScore = scorer->calculateScore(badPlan.get(), *cq);
8097

8198
ASSERT_GT(goodScore, badScore);
8299
}
83100

101+
TEST(PlanRankerTest, DistinctBonus) {
102+
RAIIServerParameterControllerForTest shardFilteringDistinct(
103+
"featureFlagShardFilteringDistinctScan", true);
104+
105+
// Two plans: both fetch, one is a DISTINCT_SCAN, other is an IXSCAN.
106+
// DISTINCT_SCAN does 2 advances / 10 works.
107+
auto dsStats = make_unique<DistinctScanStats>();
108+
dsStats->isFetching = true;
109+
dsStats->isShardFilteringDistinctScanEnabled = true;
110+
auto distinctScanPlan =
111+
makeStats("DISTINCT_SCAN", STAGE_DISTINCT_SCAN, std::move(dsStats), 10, 2);
112+
113+
// IXSCAN plan does 2 advances / 10 works.
114+
auto ixscanPlan = makeStats("FETCH", STAGE_FETCH, make_unique<FetchStats>(), 10, 2);
115+
ixscanPlan->children.emplace_back(
116+
makeStats("IXSCAN", STAGE_IXSCAN, make_unique<IndexScanStats>(), 10, 2));
117+
118+
auto cq = makeCanonicalQuery();
119+
cq->setDistinct(CanonicalDistinct("someKey"));
120+
auto scorer = plan_ranker::makePlanScorer();
121+
auto distinctScore = scorer->calculateScore(distinctScanPlan.get(), *cq);
122+
auto ixscanScore = scorer->calculateScore(ixscanPlan.get(), *cq);
123+
124+
// Both plans should tie now- a tie-breaker will be applied at a later stage.
125+
ASSERT_EQ(distinctScore, ixscanScore);
126+
127+
// Now we change to an aggregation context (simulate $groupByDistinct rewrite case).
128+
auto groupBson = BSON("$group" << BSON("_id"
129+
<< "someKey"));
130+
cq->setCqPipeline(
131+
{DocumentSourceGroup::createFromBson(groupBson.firstElement(), cq->getExpCtx())}, true);
132+
133+
// When in a distinct() context, productivity is considered larger in a distinct, even if both
134+
// plans have the same advances:work ratio. A DISTINCT_SCAN should now win by a large margin
135+
// (tie breaker).
136+
distinctScore = scorer->calculateScore(distinctScanPlan.get(), *cq);
137+
ixscanScore = scorer->calculateScore(ixscanPlan.get(), *cq);
138+
ASSERT_GT(distinctScore, ixscanScore);
139+
140+
// If we make the IXSCAN productive enough, however, it can still win!
141+
ixscanPlan->children[0]->common.advanced = 10;
142+
ixscanPlan->common.advanced = 10;
143+
distinctScore = scorer->calculateScore(distinctScanPlan.get(), *cq);
144+
ixscanScore = scorer->calculateScore(ixscanPlan.get(), *cq);
145+
ASSERT_GT(ixscanScore, distinctScore);
146+
}
147+
84148
}; // namespace

src/mongo/db/query/plan_ranker_util.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ void addTieBreakingHeuristicsBonuses(
208208
*/
209209
template <typename PlanStageType, typename ResultType, typename Data>
210210
StatusWith<std::unique_ptr<PlanRankingDecision>> pickBestPlan(
211-
const std::vector<BaseCandidatePlan<PlanStageType, ResultType, Data>>& candidates) {
211+
const std::vector<BaseCandidatePlan<PlanStageType, ResultType, Data>>& candidates,
212+
const CanonicalQuery& cq) {
212213
invariant(!candidates.empty());
213214
// A plan that hits EOF is automatically scored above
214215
// its peers. If multiple plans hit EOF during the same
@@ -243,7 +244,7 @@ StatusWith<std::unique_ptr<PlanRankingDecision>> pickBestPlan(
243244
[&]() { return explainer->getPlanSummary(); },
244245
i,
245246
statTrees[i]->common.isEOF);
246-
double score = makePlanScorer()->calculateScore(statTrees[i].get());
247+
double score = makePlanScorer()->calculateScore(statTrees[i].get(), cq);
247248
log_detail::logScore(score);
248249
if (statTrees[i]->common.isEOF) {
249250
log_detail::logEOFBonus(eofBonus);

0 commit comments

Comments
 (0)