3131
3232package org .opensearch .search .aggregations .bucket .histogram ;
3333
34+ import org .apache .logging .log4j .LogManager ;
35+ import org .apache .logging .log4j .Logger ;
3436import org .apache .lucene .index .DocValues ;
37+ import org .apache .lucene .index .DocValuesSkipper ;
3538import org .apache .lucene .index .LeafReaderContext ;
3639import org .apache .lucene .index .NumericDocValues ;
3740import org .apache .lucene .index .SortedNumericDocValues ;
41+ import org .apache .lucene .search .DocIdStream ;
42+ import org .apache .lucene .search .Scorable ;
3843import org .apache .lucene .search .ScoreMode ;
3944import org .apache .lucene .util .CollectionUtil ;
4045import org .opensearch .common .Nullable ;
8893 * @opensearch.internal
8994 */
9095class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator , StarTreePreComputeCollector {
96+ private static final Logger logger = LogManager .getLogger (DateHistogramAggregator .class );
97+
9198 private final ValuesSource .Numeric valuesSource ;
9299 private final DocValueFormat formatter ;
93100 private final Rounding rounding ;
@@ -105,7 +112,8 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg
105112 private boolean starTreeDateRoundingRequired = true ;
106113
107114 private final FilterRewriteOptimizationContext filterRewriteOptimizationContext ;
108- public final String fieldName ;
115+ private final String fieldName ;
116+ private final boolean fieldIndexSort ;
109117
110118 DateHistogramAggregator (
111119 String name ,
@@ -173,6 +181,7 @@ protected Function<Long, Long> bucketOrdProducer() {
173181 this .fieldName = (valuesSource instanceof ValuesSource .Numeric .FieldData )
174182 ? ((ValuesSource .Numeric .FieldData ) valuesSource ).getIndexFieldName ()
175183 : null ;
184+ this .fieldIndexSort = this .fieldName == null ? false : context .getQueryShardContext ().indexSortedOnField (fieldName );
176185 this .starTreeDateDimension = (context .getQueryShardContext ().getStarTreeQueryContext () != null )
177186 ? fetchStarTreeCalendarUnit ()
178187 : null ;
@@ -209,9 +218,22 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
209218 return LeafBucketCollector .NO_OP_COLLECTOR ;
210219 }
211220
221+ DocValuesSkipper skipper = null ;
222+ if (this .fieldName != null ) {
223+ skipper = ctx .reader ().getDocValuesSkipper (this .fieldName );
224+ }
212225 final SortedNumericDocValues values = valuesSource .longValues (ctx );
213226 final NumericDocValues singleton = DocValues .unwrapSingleton (values );
214227
228+ // If no subaggregations and index sorted on given field, we can use skip list based collector
229+ logger .trace ("Index sort field found: {}, skipper: {}" , fieldIndexSort , skipper );
230+ if (fieldIndexSort && skipper != null && singleton != null ) {
231+ // TODO: add hard bounds support
232+ if (hardBounds != null || sub == null || sub == LeafBucketCollector .NO_OP_COLLECTOR ) {
233+ return new HistogramSkiplistLeafCollector (singleton , skipper , preparedRounding , bucketOrds , this ::incrementBucketDocCount );
234+ }
235+ }
236+
215237 if (singleton != null ) {
216238 // Optimized path for single-valued fields
217239 return new LeafBucketCollectorBase (sub , values ) {
@@ -397,4 +419,126 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) {
397419 return 1.0 ;
398420 }
399421 }
422+
423+ private static class HistogramSkiplistLeafCollector extends LeafBucketCollector {
424+
425+ private final NumericDocValues values ;
426+ private final DocValuesSkipper skipper ;
427+ private final Rounding .Prepared preparedRounding ;
428+ private final LongKeyedBucketOrds bucketOrds ;
429+ private final BiConsumer <Long , Long > incrementDocCount ;
430+
431+ /**
432+ * Max doc ID (inclusive) up to which all docs values may map to the same bucket.
433+ */
434+ private int upToInclusive = -1 ;
435+
436+ /**
437+ * Whether all docs up to {@link #upToInclusive} values map to the same bucket.
438+ */
439+ private boolean upToSameBucket ;
440+
441+ /**
442+ * Index in bucketOrds for docs up to {@link #upToInclusive}.
443+ */
444+ private long upToBucketIndex ;
445+
446+ HistogramSkiplistLeafCollector (
447+ NumericDocValues values ,
448+ DocValuesSkipper skipper ,
449+ Rounding .Prepared preparedRounding ,
450+ LongKeyedBucketOrds bucketOrds ,
451+ BiConsumer <Long , Long > incrementDocCount
452+ ) {
453+ this .values = values ;
454+ this .skipper = skipper ;
455+ this .preparedRounding = preparedRounding ;
456+ this .bucketOrds = bucketOrds ;
457+ this .incrementDocCount = incrementDocCount ;
458+ }
459+
460+ @ Override
461+ public void setScorer (Scorable scorer ) throws IOException {}
462+
463+ private void advanceSkipper (int doc ) throws IOException {
464+ if (doc > skipper .maxDocID (0 )) {
465+ skipper .advance (doc );
466+ }
467+ upToSameBucket = false ;
468+
469+ if (skipper .minDocID (0 ) > doc ) {
470+ // Corner case which happens if `doc` doesn't have a value and is between two intervals of
471+ // the doc-value skip index.
472+ upToInclusive = skipper .minDocID (0 ) - 1 ;
473+ return ;
474+ }
475+
476+ upToInclusive = skipper .maxDocID (0 );
477+
478+ // Now find the highest level where all docs map to the same bucket.
479+ for (int level = 0 ; level < skipper .numLevels (); ++level ) {
480+ int totalDocsAtLevel = skipper .maxDocID (level ) - skipper .minDocID (level ) + 1 ;
481+ long minBucket = preparedRounding .round (skipper .minValue (level ));
482+ long maxBucket = preparedRounding .round (skipper .maxValue (level ));
483+
484+ if (skipper .docCount (level ) == totalDocsAtLevel && minBucket == maxBucket ) {
485+ // All docs at this level have a value, and all values map to the same bucket.
486+ upToInclusive = skipper .maxDocID (level );
487+ upToSameBucket = true ;
488+ upToBucketIndex = bucketOrds .add (0 , maxBucket );
489+ if (upToBucketIndex < 0 ) {
490+ upToBucketIndex = -1 - upToBucketIndex ;
491+ }
492+ } else {
493+ break ;
494+ }
495+ }
496+ }
497+
498+ @ Override
499+ public void collect (int doc , long owningBucketOrd ) throws IOException {
500+ collect (doc );
501+ }
502+
503+ @ Override
504+ public void collect (int doc ) throws IOException {
505+ if (doc > upToInclusive ) {
506+ advanceSkipper (doc );
507+ }
508+
509+ if (upToSameBucket ) {
510+ incrementDocCount .accept (upToBucketIndex , 1L );
511+ } else if (values .advanceExact (doc )) {
512+ final long value = values .longValue ();
513+ long bucketIndex = bucketOrds .add (0 , preparedRounding .round (value ));
514+ if (bucketIndex < 0 ) {
515+ bucketIndex = -1 - bucketIndex ;
516+ }
517+ incrementDocCount .accept (bucketIndex , 1L );
518+ }
519+ }
520+
521+ @ Override
522+ public void collect (DocIdStream stream ) throws IOException {
523+ for (;;) {
524+ int upToExclusive = upToInclusive + 1 ;
525+ if (upToExclusive < 0 ) { // overflow
526+ upToExclusive = Integer .MAX_VALUE ;
527+ }
528+
529+ if (upToSameBucket ) {
530+ long count = stream .count (upToExclusive );
531+ incrementDocCount .accept (upToBucketIndex , count );
532+ } else {
533+ stream .forEach (upToExclusive , this ::collect );
534+ }
535+
536+ if (stream .mayHaveRemaining ()) {
537+ advanceSkipper (upToExclusive );
538+ } else {
539+ break ;
540+ }
541+ }
542+ }
543+ }
400544}
0 commit comments