@@ -160,19 +160,9 @@ std::vector<BSONObj> SamplingEstimatorImpl::getIndexKeys(const IndexBounds& boun
160
160
nullptr /* collator*/ ,
161
161
boost::none);
162
162
163
- // This function converts an index key to a BSONObj in order to compare with the IndexBounds.
164
- auto keyStringToBson = [](const key_string::Value& keyString) {
165
- BSONObjBuilder bob;
166
- auto keyStringObj = key_string::toBson (keyString, Ordering::make (BSONObj ()));
167
- for (auto && keyStringElem : keyStringObj) {
168
- bob.append (keyStringElem);
169
- }
170
- return bob.obj ();
171
- };
172
-
173
163
std::vector<BSONObj> indexKeys;
174
164
for (auto && keyString : keyStrings) {
175
- indexKeys.push_back (keyStringToBson (keyString));
165
+ indexKeys.push_back (key_string::toBson (keyString, Ordering::make ( BSONObj ()) ));
176
166
}
177
167
return indexKeys;
178
168
}
@@ -303,10 +293,12 @@ SamplingEstimatorImpl::generateChunkSamplingPlan(PlanYieldPolicy* sbeYieldPolicy
303
293
0 /* nodeId */ );
304
294
305
295
296
+ sbe::value::SlotVector outerProjectsSlots;
297
+ sbe::value::SlotVector outerCorrelatedSlots{*outerRid};
306
298
auto loopJoinStage = sbe::makeS<sbe::LoopJoinStage>(std::move (outerStage),
307
299
std::move (innerStage),
308
- sbe::value::SlotVector{} ,
309
- sbe::value::SlotVector{*outerRid} ,
300
+ outerProjectsSlots ,
301
+ outerCorrelatedSlots ,
310
302
nullptr /* predicate */ ,
311
303
0 /* _nodeId */ );
312
304
@@ -414,6 +406,38 @@ void SamplingEstimatorImpl::generateChunkSample() {
414
406
return ;
415
407
}
416
408
409
+ void SamplingEstimatorImpl::generateSampleBySeqScanningForTesting () {
410
+ // Create a CanonicalQuery for the sampling plan.
411
+ auto cq = makeCanonicalQuery (_collections.getMainCollection ()->ns (), _opCtx, _sampleSize);
412
+ auto sbeYieldPolicy = PlanYieldPolicySBE::make (
413
+ _opCtx, PlanYieldPolicy::YieldPolicy::YIELD_AUTO, _collections, cq->nss ());
414
+
415
+ auto staticData = std::make_unique<stage_builder::PlanStageStaticData>();
416
+ sbe::value::SlotIdGenerator ids;
417
+ staticData->resultSlot = ids.generate ();
418
+ const CollectionPtr& collection = _collections.getMainCollection ();
419
+ // Scan the first '_sampleSize' documents sequentially from the start of the target collection
420
+ // in order to generate a repeatable sample.
421
+ auto stage = makeScanStage (
422
+ collection, staticData->resultSlot , boost::none, boost::none, false , sbeYieldPolicy.get ());
423
+ stage = sbe::makeS<sbe::LimitSkipStage>(
424
+ std::move (stage),
425
+ sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::NumberInt64,
426
+ sbe::value::bitcastFrom<int64_t >(_sampleSize)),
427
+ nullptr /* skip */ ,
428
+ 0 /* nodeId */ );
429
+
430
+ stage_builder::PlanStageData data{
431
+ stage_builder::Environment{std::make_unique<sbe::RuntimeEnvironment>()},
432
+ std::move (staticData)};
433
+ auto plan =
434
+ std::make_pair<std::unique_ptr<sbe::PlanStage>, mongo::stage_builder::PlanStageData>(
435
+ std::move (stage), std::move (data));
436
+ executeSamplingQueryAndSample (plan, std::move (cq), std::move (sbeYieldPolicy));
437
+
438
+ return ;
439
+ }
440
+
417
441
CardinalityEstimate SamplingEstimatorImpl::estimateCardinality (const MatchExpression* expr) const {
418
442
size_t cnt = 0 ;
419
443
for (const auto & doc : _sample) {
@@ -527,6 +551,11 @@ SamplingEstimatorImpl::SamplingEstimatorImpl(OperationContext* opCtx,
527
551
_sampleSize(sampleSize),
528
552
_numChunks(numChunks),
529
553
_collectionCard(collectionCard) {
554
+ if (internalQuerySamplingBySequentialScan.load ()) {
555
+ // This is only used for testing purposes when a repeatable sample is needed.
556
+ generateSampleBySeqScanningForTesting ();
557
+ return ;
558
+ }
530
559
531
560
if (sampleSize >= collectionCard.cardinality ().v ()) {
532
561
// If the required sample is larger than the collection, the sample is generated from all
0 commit comments