9
9
package org .elasticsearch .search .aggregations .bucket .terms ;
10
10
11
11
import org .apache .lucene .index .DocValues ;
12
+ import org .apache .lucene .index .LeafReader ;
12
13
import org .apache .lucene .index .LeafReaderContext ;
13
14
import org .apache .lucene .index .SortedDocValues ;
14
15
import org .apache .lucene .index .SortedSetDocValues ;
15
16
import org .apache .lucene .util .ArrayUtil ;
17
+ import org .apache .lucene .util .Bits ;
16
18
import org .apache .lucene .util .BytesRef ;
17
19
import org .apache .lucene .util .PriorityQueue ;
18
20
import org .elasticsearch .common .io .stream .StreamOutput ;
21
+ import org .elasticsearch .common .util .BigArrays ;
19
22
import org .elasticsearch .common .util .LongArray ;
20
23
import org .elasticsearch .common .util .LongHash ;
24
+ import org .elasticsearch .core .Nullable ;
21
25
import org .elasticsearch .core .Releasable ;
22
26
import org .elasticsearch .core .Releasables ;
23
27
import org .elasticsearch .search .DocValueFormat ;
@@ -82,7 +86,8 @@ public GlobalOrdinalsStringTermsAggregator(
82
86
SubAggCollectionMode collectionMode ,
83
87
boolean showTermDocCountError ,
84
88
CardinalityUpperBound cardinality ,
85
- Map <String , Object > metadata
89
+ Map <String , Object > metadata ,
90
+ boolean excludeDeletedDocs
86
91
) throws IOException {
87
92
super (name , factories , context , parent , order , format , bucketCountThresholds , collectionMode , showTermDocCountError , metadata );
88
93
this .resultStrategy = resultStrategy .apply (this ); // ResultStrategy needs a reference to the Aggregator to do its job.
@@ -91,13 +96,13 @@ public GlobalOrdinalsStringTermsAggregator(
91
96
this .lookupGlobalOrd = values ::lookupOrd ;
92
97
this .acceptedGlobalOrdinals = acceptedOrds ;
93
98
if (remapGlobalOrds ) {
94
- this .collectionStrategy = new RemapGlobalOrds (cardinality );
99
+ this .collectionStrategy = new RemapGlobalOrds (cardinality , excludeDeletedDocs );
95
100
} else {
96
101
this .collectionStrategy = cardinality .map (estimate -> {
97
102
if (estimate > 1 ) {
98
103
throw new AggregationExecutionException ("Dense ords don't know how to collect from many buckets" );
99
104
}
100
- return new DenseGlobalOrds ();
105
+ return new DenseGlobalOrds (excludeDeletedDocs );
101
106
});
102
107
}
103
108
}
@@ -274,7 +279,8 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
274
279
boolean remapGlobalOrds ,
275
280
SubAggCollectionMode collectionMode ,
276
281
boolean showTermDocCountError ,
277
- Map <String , Object > metadata
282
+ Map <String , Object > metadata ,
283
+ boolean excludeDeletedDocs
278
284
) throws IOException {
279
285
super (
280
286
name ,
@@ -292,7 +298,8 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
292
298
collectionMode ,
293
299
showTermDocCountError ,
294
300
CardinalityUpperBound .ONE ,
295
- metadata
301
+ metadata ,
302
+ excludeDeletedDocs
296
303
);
297
304
assert factories == null || factories .countAggregators () == 0 ;
298
305
this .segmentDocCounts = context .bigArrays ().newLongArray (1 , true );
@@ -441,6 +448,13 @@ interface BucketInfoConsumer {
441
448
* bucket ordinal.
442
449
*/
443
450
class DenseGlobalOrds extends CollectionStrategy {
451
+
452
+ private final boolean excludeDeletedDocs ;
453
+
454
+ DenseGlobalOrds (boolean excludeDeletedDocs ) {
455
+ this .excludeDeletedDocs = excludeDeletedDocs ;
456
+ }
457
+
444
458
@ Override
445
459
String describe () {
446
460
return "dense" ;
@@ -471,6 +485,14 @@ long globalOrdToBucketOrd(long owningBucketOrd, long globalOrd) {
471
485
@ Override
472
486
void forEach (long owningBucketOrd , BucketInfoConsumer consumer ) throws IOException {
473
487
assert owningBucketOrd == 0 ;
488
+ if (excludeDeletedDocs ) {
489
+ forEachExcludeDeletedDocs (consumer );
490
+ } else {
491
+ forEachAllowDeletedDocs (consumer );
492
+ }
493
+ }
494
+
495
+ private void forEachAllowDeletedDocs (BucketInfoConsumer consumer ) throws IOException {
474
496
for (long globalOrd = 0 ; globalOrd < valueCount ; globalOrd ++) {
475
497
if (false == acceptedGlobalOrdinals .test (globalOrd )) {
476
498
continue ;
@@ -482,6 +504,39 @@ void forEach(long owningBucketOrd, BucketInfoConsumer consumer) throws IOExcepti
482
504
}
483
505
}
484
506
507
+ /**
508
+ * Excludes deleted docs in the results by cross-checking with liveDocs.
509
+ */
510
+ private void forEachExcludeDeletedDocs (BucketInfoConsumer consumer ) throws IOException {
511
+ try (LongHash accepted = new LongHash (20 , new BigArrays (null , null , "" ))) {
512
+ for (LeafReaderContext ctx : searcher ().getTopReaderContext ().leaves ()) {
513
+ LeafReader reader = ctx .reader ();
514
+ Bits liveDocs = reader .getLiveDocs ();
515
+ SortedSetDocValues globalOrds = null ;
516
+ for (int docId = 0 ; docId < reader .maxDoc (); ++docId ) {
517
+ if (liveDocs == null || liveDocs .get (docId )) { // document is not deleted
518
+ globalOrds = globalOrds == null ? valuesSource .globalOrdinalsValues (ctx ) : globalOrds ;
519
+ if (globalOrds .advanceExact (docId )) {
520
+ for (long globalOrd = globalOrds .nextOrd (); globalOrd != NO_MORE_ORDS ; globalOrd = globalOrds .nextOrd ()) {
521
+ if (accepted .find (globalOrd ) >= 0 ) {
522
+ continue ;
523
+ }
524
+ if (false == acceptedGlobalOrdinals .test (globalOrd )) {
525
+ continue ;
526
+ }
527
+ long docCount = bucketDocCount (globalOrd );
528
+ if (bucketCountThresholds .getMinDocCount () == 0 || docCount > 0 ) {
529
+ consumer .accept (globalOrd , globalOrd , docCount );
530
+ accepted .add (globalOrd );
531
+ }
532
+ }
533
+ }
534
+ }
535
+ }
536
+ }
537
+ }
538
+ }
539
+
485
540
@ Override
486
541
public void close () {}
487
542
}
@@ -494,9 +549,11 @@ public void close() {}
494
549
*/
495
550
private class RemapGlobalOrds extends CollectionStrategy {
496
551
private final LongKeyedBucketOrds bucketOrds ;
552
+ private final boolean excludeDeletedDocs ;
497
553
498
- private RemapGlobalOrds (CardinalityUpperBound cardinality ) {
554
+ private RemapGlobalOrds (CardinalityUpperBound cardinality , boolean excludeDeletedDocs ) {
499
555
bucketOrds = LongKeyedBucketOrds .buildForValueRange (bigArrays (), cardinality , 0 , valueCount - 1 );
556
+ this .excludeDeletedDocs = excludeDeletedDocs ;
500
557
}
501
558
502
559
@ Override
@@ -530,27 +587,20 @@ long globalOrdToBucketOrd(long owningBucketOrd, long globalOrd) {
530
587
531
588
@ Override
532
589
void forEach (long owningBucketOrd , BucketInfoConsumer consumer ) throws IOException {
590
+ if (excludeDeletedDocs ) {
591
+ forEachExcludeDeletedDocs (owningBucketOrd , consumer );
592
+ } else {
593
+ forEachAllowDeletedDocs (owningBucketOrd , consumer );
594
+ }
595
+ }
596
+
597
+ void forEachAllowDeletedDocs (long owningBucketOrd , BucketInfoConsumer consumer ) throws IOException {
533
598
if (bucketCountThresholds .getMinDocCount () == 0 ) {
534
599
for (long globalOrd = 0 ; globalOrd < valueCount ; globalOrd ++) {
535
600
if (false == acceptedGlobalOrdinals .test (globalOrd )) {
536
601
continue ;
537
602
}
538
- /*
539
- * Use `add` instead of `find` here to assign an ordinal
540
- * even if the global ord wasn't found so we can build
541
- * sub-aggregations without trouble even though we haven't
542
- * hit any documents for them. This is wasteful, but
543
- * settings minDocCount == 0 is wasteful in general.....
544
- */
545
- long bucketOrd = bucketOrds .add (owningBucketOrd , globalOrd );
546
- long docCount ;
547
- if (bucketOrd < 0 ) {
548
- bucketOrd = -1 - bucketOrd ;
549
- docCount = bucketDocCount (bucketOrd );
550
- } else {
551
- docCount = 0 ;
552
- }
553
- consumer .accept (globalOrd , bucketOrd , docCount );
603
+ addBucketForMinDocCountZero (owningBucketOrd , globalOrd , consumer , null );
554
604
}
555
605
} else {
556
606
LongKeyedBucketOrds .BucketOrdsEnum ordsEnum = bucketOrds .ordsEnum (owningBucketOrd );
@@ -563,6 +613,64 @@ void forEach(long owningBucketOrd, BucketInfoConsumer consumer) throws IOExcepti
563
613
}
564
614
}
565
615
616
+ /**
617
+ * Excludes deleted docs in the results by cross-checking with liveDocs.
618
+ */
619
+ void forEachExcludeDeletedDocs (long owningBucketOrd , BucketInfoConsumer consumer ) throws IOException {
620
+ assert bucketCountThresholds .getMinDocCount () == 0 ;
621
+ try (LongHash accepted = new LongHash (20 , new BigArrays (null , null , "" ))) {
622
+ for (LeafReaderContext ctx : searcher ().getTopReaderContext ().leaves ()) {
623
+ LeafReader reader = ctx .reader ();
624
+ Bits liveDocs = reader .getLiveDocs ();
625
+ SortedSetDocValues globalOrds = null ;
626
+ for (int docId = 0 ; docId < reader .maxDoc (); ++docId ) {
627
+ if (liveDocs == null || liveDocs .get (docId )) { // document is not deleted
628
+ globalOrds = globalOrds == null ? valuesSource .globalOrdinalsValues (ctx ) : globalOrds ;
629
+ if (globalOrds .advanceExact (docId )) {
630
+ for (long globalOrd = globalOrds .nextOrd (); globalOrd != NO_MORE_ORDS ; globalOrd = globalOrds .nextOrd ()) {
631
+ if (accepted .find (globalOrd ) >= 0 ) {
632
+ continue ;
633
+ }
634
+ if (false == acceptedGlobalOrdinals .test (globalOrd )) {
635
+ continue ;
636
+ }
637
+ addBucketForMinDocCountZero (owningBucketOrd , globalOrd , consumer , accepted );
638
+ }
639
+ }
640
+ }
641
+ }
642
+ }
643
+ }
644
+ }
645
+
646
+ private void addBucketForMinDocCountZero (
647
+ long owningBucketOrd ,
648
+ long globalOrd ,
649
+ BucketInfoConsumer consumer ,
650
+ @ Nullable LongHash accepted
651
+ ) throws IOException {
652
+ /*
653
+ * Use `add` instead of `find` here to assign an ordinal
654
+ * even if the global ord wasn't found so we can build
655
+ * sub-aggregations without trouble even though we haven't
656
+ * hit any documents for them. This is wasteful, but
657
+ * settings minDocCount == 0 is wasteful in general.....
658
+ */
659
+ long bucketOrd = bucketOrds .add (owningBucketOrd , globalOrd );
660
+ long docCount ;
661
+ if (bucketOrd < 0 ) {
662
+ bucketOrd = -1 - bucketOrd ;
663
+ docCount = bucketDocCount (bucketOrd );
664
+ } else {
665
+ docCount = 0 ;
666
+ }
667
+ assert globalOrd >= 0 ;
668
+ consumer .accept (globalOrd , bucketOrd , docCount );
669
+ if (accepted != null ) {
670
+ accepted .add (globalOrd );
671
+ }
672
+ }
673
+
566
674
@ Override
567
675
public void close () {
568
676
bucketOrds .close ();
0 commit comments