Min score for time series (#96878)

tmgordeeva · web-flow · commit 59c6621d246a · 2023-06-20T11:33:33.000-07:00
* Min score for time series

Enables min score on time series aggregation.
diff --git a/docs/changelog/96878.yaml b/docs/changelog/96878.yaml
@@ -0,0 +1,5 @@
+pr: 96878
+summary: Min score for time series
+area: TSDB
+type: bug
+issues: []
diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/time_series.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/time_series.yml
@@ -148,6 +148,60 @@ setup:
   - match: { aggregations.ts.buckets.1.key: { "key": "baz" } }
   - match: { aggregations.ts.buckets.2.key: { "key": "foo" } }
 
+---
+"Score test filter some":
+  - skip:
+      version: " - 8.8.99"
+      reason: Time series min score fixed in 8.9
+
+  - do:
+      search:
+        index: tsdb
+        body:
+          query:
+            function_score:
+              field_value_factor:
+                field: "val"
+                factor: 0.1
+                missing: 1
+          size: 0
+          min_score: 0.3
+          aggs:
+            ts:
+              time_series:
+                keyed: false
+
+  - match: { hits.total.value: 6 }
+  - length: { aggregations: 1 }
+
+  - length: { aggregations.ts.buckets: 3 }
+
+---
+"Score test filter all":
+  - skip:
+      version: " - 8.8.99"
+      reason: Time series min score fixed in 8.9
+
+  - do:
+      search:
+        index: tsdb
+        body:
+          query:
+            range:
+              "@timestamp":
+                gte: "2021-01-01T00:10:00Z"
+          size: 0
+          min_score: 100
+          aggs:
+            ts:
+              time_series:
+                keyed: false
+
+  - match: { hits.total.value: 0 }
+  - length: { aggregations: 1 }
+  - length: { aggregations.ts.buckets: 0 }
+
+
 ---
 "Sampler aggregation with nested time series aggregation failure":
   - skip:
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java
@@ -44,6 +44,7 @@ public static void preProcess(SearchContext context) {
         if (context.aggregations().factories().context() != null
             && context.aggregations().factories().context().isInSortOrderExecutionRequired()) {
             TimeSeriesIndexSearcher searcher = new TimeSeriesIndexSearcher(context.searcher(), getCancellationChecks(context));
+            searcher.setMinimumScore(context.minimumScore());
             searcher.setProfiler(context);
             try {
                 searcher.search(context.rewrittenQuery(), bucketCollector);
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/TimeSeriesIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/TimeSeriesIndexSearcher.java
@@ -22,6 +22,7 @@
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.PriorityQueue;
 import org.elasticsearch.cluster.metadata.DataStream;
+import org.elasticsearch.common.lucene.search.function.MinScoreScorer;
 import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
 import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
 import org.elasticsearch.search.aggregations.AggregationExecutionContext;
@@ -53,6 +54,8 @@ public class TimeSeriesIndexSearcher {
     private final boolean tsidReverse;
     private final boolean timestampReverse;
 
+    private Float minimumScore = null;
+
     public TimeSeriesIndexSearcher(IndexSearcher searcher, List<Runnable> cancellations) {
         try {
             this.searcher = new ContextIndexSearcher(
@@ -76,6 +79,10 @@ public TimeSeriesIndexSearcher(IndexSearcher searcher, List<Runnable> cancellati
         this.timestampReverse = TIME_SERIES_SORT[1].getOrder() == SortOrder.DESC;
     }
 
+    public void setMinimumScore(Float minimumScore) {
+        this.minimumScore = minimumScore;
+    }
+
     public void search(Query query, BucketCollector bucketCollector) throws IOException {
         int seen = 0;
         query = searcher.rewrite(query);
@@ -90,6 +97,9 @@ public void search(Query query, BucketCollector bucketCollector) throws IOExcept
             }
             Scorer scorer = weight.scorer(leaf);
             if (scorer != null) {
+                if (minimumScore != null) {
+                    scorer = new MinScoreScorer(weight, scorer, minimumScore);
+                }
                 LeafWalker leafWalker = new LeafWalker(leaf, scorer, bucketCollector, () -> tsidOrd[0]);
                 if (leafWalker.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                     leafWalkers.add(leafWalker);
@@ -197,6 +207,9 @@ private static class LeafWalker {
         private final SortedDocValues tsids;
         private final SortedNumericDocValues timestamps;    // TODO can we have this just a NumericDocValues?
         private final BytesRefBuilder scratch = new BytesRefBuilder();
+
+        private final Scorer scorer;
+
         int docId = -1;
         int tsidOrd;
         long timestamp;
@@ -207,6 +220,7 @@ private static class LeafWalker {
             this.collector = bucketCollector.getLeafCollector(aggCtx);
             liveDocs = context.reader().getLiveDocs();
             this.collector.setScorer(scorer);
+            this.scorer = scorer;
             iterator = scorer.iterator();
             tsids = DocValues.getSorted(context.reader(), TimeSeriesIdFieldMapper.NAME);
             timestamps = DocValues.getSortedNumeric(context.reader(), DataStream.TimestampField.FIXED_TIMESTAMP_FIELD);
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/TimeSeriesIndexSearcherTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/TimeSeriesIndexSearcherTests.java
@@ -18,6 +18,7 @@
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.ScoreMode;
@@ -95,6 +96,51 @@ public void testCollectInOrderAcrossSegments() throws IOException, InterruptedEx
         dir.close();
     }
 
+    public void testCollectMinScoreAcrossSegments() throws IOException, InterruptedException {
+        Directory dir = newDirectory();
+        RandomIndexWriter iw = getIndexWriter(dir);
+
+        AtomicInteger clock = new AtomicInteger(0);
+
+        final int DOC_COUNTS = 5;
+        Document doc = new Document();
+        for (int j = 0; j < DOC_COUNTS; j++) {
+            String tsid = "tsid" + j % 30;
+            long time = clock.addAndGet(j % 10);
+            doc.clear();
+            doc.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, new BytesRef(tsid)));
+            doc.add(new NumericDocValuesField(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD, time));
+            try {
+                iw.addDocument(doc);
+            } catch (IOException e) {
+                throw new UncheckedIOException(e);
+            }
+        }
+        iw.close();
+
+        IndexReader reader = DirectoryReader.open(dir);
+        IndexSearcher searcher = new IndexSearcher(reader);
+
+        TimeSeriesIndexSearcher indexSearcher = new TimeSeriesIndexSearcher(searcher, List.of());
+        indexSearcher.setMinimumScore(2f);
+
+        {
+            var collector = new TimeSeriesCancellationTests.CountingBucketCollector();
+            var query = new BoostQuery(new MatchAllDocsQuery(), 2f);
+            indexSearcher.search(query, collector);
+            assertEquals(collector.count.get(), DOC_COUNTS);
+        }
+        {
+            var collector = new TimeSeriesCancellationTests.CountingBucketCollector();
+            var query = new BoostQuery(new MatchAllDocsQuery(), 1f);
+            indexSearcher.search(query, collector);
+            assertEquals(collector.count.get(), 0);
+        }
+
+        reader.close();
+        dir.close();
+    }
+
     /**
      * this test fixed the wrong init value of tsidOrd
      * See https://github.com/elastic/elasticsearch/issues/85711