Skip to content

Commit e359f50

Browse files
authored
Reusing Batch Iterators (#6403)
* Reusing Batch Iterators Signed-off-by: alanprot <[email protected]> * addressing some comments Signed-off-by: alanprot <[email protected]> * lint Signed-off-by: alanprot <[email protected]> * fixing reset method Signed-off-by: alanprot <[email protected]> --------- Signed-off-by: alanprot <[email protected]>
1 parent 3269f9d commit e359f50

File tree

9 files changed

+121
-62
lines changed

9 files changed

+121
-62
lines changed

pkg/ingester/ingester_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ func seriesSetFromResponseStream(s *mockQueryStreamServer) (storage.SeriesSet, e
110110

111111
serieses = append(serieses, &storage.SeriesEntry{
112112
Lset: ls,
113-
SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator {
114-
return batch.NewChunkMergeIterator(chunks, math.MinInt64, math.MaxInt64)
113+
SampleIteratorFn: func(it chunkenc.Iterator) chunkenc.Iterator {
114+
return batch.NewChunkMergeIterator(it, chunks, math.MinInt64, math.MaxInt64)
115115
},
116116
})
117117
}

pkg/querier/batch/batch.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,26 @@ type iterator interface {
5252
}
5353

5454
// NewChunkMergeIterator returns a chunkenc.Iterator that merges Cortex chunks together.
55-
func NewChunkMergeIterator(chunks []chunk.Chunk, _, _ model.Time) chunkenc.Iterator {
55+
func NewChunkMergeIterator(it chunkenc.Iterator, chunks []chunk.Chunk, _, _ model.Time) chunkenc.Iterator {
5656
converted := make([]GenericChunk, len(chunks))
5757
for i, c := range chunks {
5858
c := c
5959
converted[i] = NewGenericChunk(int64(c.From), int64(c.Through), c.NewIterator)
6060
}
6161

62-
return NewGenericChunkMergeIterator(converted)
62+
return NewGenericChunkMergeIterator(it, converted)
6363
}
6464

6565
// NewGenericChunkMergeIterator returns a chunkenc.Iterator that merges generic chunks together.
66-
func NewGenericChunkMergeIterator(chunks []GenericChunk) chunkenc.Iterator {
67-
iter := newMergeIterator(chunks)
66+
func NewGenericChunkMergeIterator(it chunkenc.Iterator, chunks []GenericChunk) chunkenc.Iterator {
67+
68+
var underlying iterator
69+
70+
if ia, ok := it.(*iteratorAdapter); ok {
71+
underlying = ia.underlying
72+
}
73+
74+
iter := newMergeIterator(underlying, chunks)
6875
return newIteratorAdapter(iter)
6976
}
7077

pkg/querier/batch/batch_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ func BenchmarkNewChunkMergeIterator_CreateAndIterate(b *testing.B) {
5555
b.Run(name, func(b *testing.B) {
5656
b.ReportAllocs()
5757

58+
var it chunkenc.Iterator
5859
for n := 0; n < b.N; n++ {
59-
it := NewChunkMergeIterator(chunks, 0, 0)
60+
it = NewChunkMergeIterator(it, chunks, 0, 0)
6061
for it.Next() != chunkenc.ValNone {
6162
it.At()
6263
}
@@ -108,9 +109,9 @@ func BenchmarkNewChunkMergeIterator_Seek(b *testing.B) {
108109
b.ResetTimer()
109110
b.Run(name, func(b *testing.B) {
110111
b.ReportAllocs()
111-
112+
var it chunkenc.Iterator
112113
for n := 0; n < b.N; n++ {
113-
it := NewChunkMergeIterator(chunks, 0, 0)
114+
it = NewChunkMergeIterator(it, chunks, 0, 0)
114115
i := int64(0)
115116
for it.Seek(i*scenario.seekStep.Milliseconds()) != chunkenc.ValNone {
116117
i++
@@ -132,7 +133,7 @@ func TestSeekCorrectlyDealWithSinglePointChunks(t *testing.T) {
132133
chunkTwo := util.GenerateChunk(t, step, model.Time(10*step/time.Millisecond), 1, enc)
133134
chunks := []chunk.Chunk{chunkOne, chunkTwo}
134135

135-
sut := NewChunkMergeIterator(chunks, 0, 0)
136+
sut := NewChunkMergeIterator(nil, chunks, 0, 0)
136137

137138
// Following calls mimics Prometheus's query engine behaviour for VectorSelector.
138139
require.Equal(t, valType, sut.Next())

pkg/querier/batch/merge.go

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,27 @@ type mergeIterator struct {
2323
currErr error
2424
}
2525

26-
func newMergeIterator(cs []GenericChunk) *mergeIterator {
26+
func newMergeIterator(it iterator, cs []GenericChunk) *mergeIterator {
2727
css := partitionChunks(cs)
28-
its := make([]*nonOverlappingIterator, 0, len(css))
29-
for _, cs := range css {
30-
its = append(its, newNonOverlappingIterator(cs))
28+
29+
var c *mergeIterator
30+
31+
if mIterator, ok := it.(*mergeIterator); ok && cap(mIterator.its) >= len(css) {
32+
c = mIterator.Reset(len(css))
33+
} else {
34+
c = &mergeIterator{
35+
h: make(iteratorHeap, 0, len(css)),
36+
batches: make(batchStream, 0, len(css)),
37+
batchesBuf: make(batchStream, len(css)),
38+
}
3139
}
3240

33-
c := &mergeIterator{
34-
its: its,
35-
h: make(iteratorHeap, 0, len(its)),
36-
batches: make(batchStream, 0, len(its)),
37-
batchesBuf: make(batchStream, len(its)),
41+
if cap(c.its) < len(css) {
42+
c.its = make([]*nonOverlappingIterator, 0, len(css))
43+
}
44+
45+
for _, cs := range css {
46+
c.its = append(c.its, newNonOverlappingIterator(cs))
3847
}
3948

4049
for _, iter := range c.its {
@@ -52,6 +61,29 @@ func newMergeIterator(cs []GenericChunk) *mergeIterator {
5261
return c
5362
}
5463

64+
func (c *mergeIterator) Reset(size int) *mergeIterator {
65+
c.its = c.its[:0]
66+
c.h = c.h[:0]
67+
c.batches = c.batches[:0]
68+
69+
if size > cap(c.batchesBuf) {
70+
c.batchesBuf = make(batchStream, len(c.its))
71+
} else {
72+
c.batchesBuf = c.batchesBuf[:size]
73+
for i := 0; i < size; i++ {
74+
c.batchesBuf[i] = promchunk.Batch{}
75+
}
76+
}
77+
78+
for i := 0; i < len(c.nextBatchBuf); i++ {
79+
c.nextBatchBuf[i] = promchunk.Batch{}
80+
}
81+
82+
c.currErr = nil
83+
84+
return c
85+
}
86+
5587
func (c *mergeIterator) Seek(t int64, size int) chunkenc.ValueType {
5688

5789
// Optimisation to see if the seek is within our current caches batches.

pkg/querier/batch/merge_test.go

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package batch
22

33
import (
4+
"fmt"
45
"testing"
56
"time"
67

78
"github.com/prometheus/common/model"
9+
"github.com/prometheus/prometheus/tsdb/chunkenc"
810

911
"github.com/cortexproject/cortex/pkg/chunk/encoding"
1012
)
@@ -18,14 +20,40 @@ func TestMergeIter(t *testing.T) {
1820
chunk4 := mkGenericChunk(t, model.TimeFromUnix(75), 100, enc)
1921
chunk5 := mkGenericChunk(t, model.TimeFromUnix(100), 100, enc)
2022

21-
iter := newMergeIterator([]GenericChunk{chunk1, chunk2, chunk3, chunk4, chunk5})
23+
iter := newMergeIterator(nil, []GenericChunk{chunk1, chunk2, chunk3, chunk4, chunk5})
2224
testIter(t, 200, newIteratorAdapter(iter), enc)
2325

24-
iter = newMergeIterator([]GenericChunk{chunk1, chunk2, chunk3, chunk4, chunk5})
26+
iter = newMergeIterator(iter, []GenericChunk{chunk1, chunk2, chunk3, chunk4, chunk5})
2527
testSeek(t, 200, newIteratorAdapter(iter), enc)
2628
})
2729
}
2830

31+
func BenchmarkMergeIterator(b *testing.B) {
32+
chunks := make([]GenericChunk, 0, 10)
33+
for i := 0; i < 10; i++ {
34+
chunks = append(chunks, mkGenericChunk(b, model.Time(i*25), 120, encoding.PrometheusXorChunk))
35+
}
36+
iter := newMergeIterator(nil, chunks)
37+
38+
for _, r := range []bool{true, false} {
39+
b.Run(fmt.Sprintf("reuse-%t", r), func(b *testing.B) {
40+
b.ResetTimer()
41+
b.ReportAllocs()
42+
for i := 0; i < b.N; i++ {
43+
if r {
44+
iter = newMergeIterator(iter, chunks)
45+
} else {
46+
iter = newMergeIterator(nil, chunks)
47+
}
48+
a := newIteratorAdapter(iter)
49+
for a.Next() != chunkenc.ValNone {
50+
51+
}
52+
}
53+
})
54+
}
55+
}
56+
2957
func TestMergeHarder(t *testing.T) {
3058
t.Parallel()
3159
forEncodings(t, func(t *testing.T, enc encoding.Encoding) {
@@ -40,10 +68,10 @@ func TestMergeHarder(t *testing.T) {
4068
chunks = append(chunks, mkGenericChunk(t, from, samples, enc))
4169
from = from.Add(time.Duration(offset) * time.Second)
4270
}
43-
iter := newMergeIterator(chunks)
71+
iter := newMergeIterator(nil, chunks)
4472
testIter(t, offset*numChunks+samples-offset, newIteratorAdapter(iter), enc)
4573

46-
iter = newMergeIterator(chunks)
74+
iter = newMergeIterator(iter, chunks)
4775
testSeek(t, offset*numChunks+samples-offset, newIteratorAdapter(iter), enc)
4876
})
4977
}

pkg/querier/chunk_store_queryable.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ import (
77
"github.com/cortexproject/cortex/pkg/chunk"
88
)
99

10-
type chunkIteratorFunc func(chunks []chunk.Chunk, from, through model.Time) chunkenc.Iterator
10+
type chunkIteratorFunc func(it chunkenc.Iterator, chunks []chunk.Chunk, from, through model.Time) chunkenc.Iterator

pkg/querier/distributor_queryable.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ func (q *distributorQuerier) streamingSelect(ctx context.Context, sortSeries boo
155155

156156
serieses = append(serieses, &storage.SeriesEntry{
157157
Lset: ls,
158-
SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator {
159-
return q.chunkIterFn(chunks, model.Time(minT), model.Time(maxT))
158+
SampleIteratorFn: func(it chunkenc.Iterator) chunkenc.Iterator {
159+
return q.chunkIterFn(it, chunks, model.Time(minT), model.Time(maxT))
160160
},
161161
})
162162
}

pkg/querier/querier.go

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,14 @@ import (
1818
"github.com/prometheus/prometheus/promql"
1919
"github.com/prometheus/prometheus/promql/parser"
2020
"github.com/prometheus/prometheus/storage"
21-
"github.com/prometheus/prometheus/tsdb/chunkenc"
2221
"github.com/prometheus/prometheus/util/annotations"
2322
"github.com/thanos-io/promql-engine/engine"
2423
"github.com/thanos-io/promql-engine/logicalplan"
2524
"github.com/thanos-io/thanos/pkg/strutil"
2625
"golang.org/x/sync/errgroup"
2726

28-
"github.com/cortexproject/cortex/pkg/chunk"
29-
"github.com/cortexproject/cortex/pkg/ingester/client"
3027
"github.com/cortexproject/cortex/pkg/querier/batch"
3128
"github.com/cortexproject/cortex/pkg/querier/lazyquery"
32-
seriesset "github.com/cortexproject/cortex/pkg/querier/series"
3329
querier_stats "github.com/cortexproject/cortex/pkg/querier/stats"
3430
"github.com/cortexproject/cortex/pkg/tenant"
3531
"github.com/cortexproject/cortex/pkg/util"
@@ -188,7 +184,7 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor
188184
QueryStoreAfter: cfg.QueryStoreAfter,
189185
}
190186
}
191-
queryable := NewQueryable(distributorQueryable, ns, iteratorFunc, cfg, limits)
187+
queryable := NewQueryable(distributorQueryable, ns, cfg, limits)
192188
exemplarQueryable := newDistributorExemplarQueryable(distributor)
193189

194190
lazyQueryable := storage.QueryableFunc(func(mint int64, maxt int64) (storage.Querier, error) {
@@ -275,13 +271,12 @@ type limiterHolder struct {
275271
}
276272

277273
// NewQueryable creates a new Queryable for cortex.
278-
func NewQueryable(distributor QueryableWithFilter, stores []QueryableWithFilter, chunkIterFn chunkIteratorFunc, cfg Config, limits *validation.Overrides) storage.Queryable {
274+
func NewQueryable(distributor QueryableWithFilter, stores []QueryableWithFilter, cfg Config, limits *validation.Overrides) storage.Queryable {
279275
return storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) {
280276
q := querier{
281277
now: time.Now(),
282278
mint: mint,
283279
maxt: maxt,
284-
chunkIterFn: chunkIterFn,
285280
limits: limits,
286281
maxQueryIntoFuture: cfg.MaxQueryIntoFuture,
287282
ignoreMaxQueryLength: cfg.IgnoreMaxQueryLength,
@@ -295,10 +290,8 @@ func NewQueryable(distributor QueryableWithFilter, stores []QueryableWithFilter,
295290
}
296291

297292
type querier struct {
298-
chunkIterFn chunkIteratorFunc
299-
now time.Time
300-
mint, maxt int64
301-
293+
now time.Time
294+
mint, maxt int64
302295
limits *validation.Overrides
303296
maxQueryIntoFuture time.Duration
304297
distributor QueryableWithFilter
@@ -670,24 +663,3 @@ func validateQueryTimeRange(ctx context.Context, userID string, startMs, endMs i
670663

671664
return int64(startTime), int64(endTime), nil
672665
}
673-
674-
// Series in the returned set are sorted alphabetically by labels.
675-
func partitionChunks(chunks []chunk.Chunk, mint, maxt int64, iteratorFunc chunkIteratorFunc) storage.SeriesSet {
676-
chunksBySeries := map[string][]chunk.Chunk{}
677-
for _, c := range chunks {
678-
key := client.LabelsToKeyString(c.Metric)
679-
chunksBySeries[key] = append(chunksBySeries[key], c)
680-
}
681-
682-
series := make([]storage.Series, 0, len(chunksBySeries))
683-
for i := range chunksBySeries {
684-
series = append(series, &storage.SeriesEntry{
685-
Lset: chunksBySeries[i][0].Metric,
686-
SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator {
687-
return iteratorFunc(chunksBySeries[i], model.Time(mint), model.Time(maxt))
688-
},
689-
})
690-
}
691-
692-
return seriesset.NewConcreteSeriesSet(true, series)
693-
}

pkg/querier/querier_test.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/prometheus/prometheus/promql"
1919
"github.com/prometheus/prometheus/scrape"
2020
"github.com/prometheus/prometheus/storage"
21+
"github.com/prometheus/prometheus/tsdb/chunkenc"
2122
"github.com/prometheus/prometheus/tsdb/tsdbutil"
2223
"github.com/prometheus/prometheus/util/annotations"
2324
"github.com/stretchr/testify/assert"
@@ -32,6 +33,7 @@ import (
3233
"github.com/cortexproject/cortex/pkg/cortexpb"
3334
"github.com/cortexproject/cortex/pkg/ingester/client"
3435
"github.com/cortexproject/cortex/pkg/querier/batch"
36+
"github.com/cortexproject/cortex/pkg/querier/series"
3537
"github.com/cortexproject/cortex/pkg/util"
3638
"github.com/cortexproject/cortex/pkg/util/chunkcompat"
3739
"github.com/cortexproject/cortex/pkg/util/flagext"
@@ -333,7 +335,7 @@ func TestShouldSortSeriesIfQueryingMultipleQueryables(t *testing.T) {
333335
for _, queryable := range tc.storeQueriables {
334336
wQueriables = append(wQueriables, &wrappedSampleAndChunkQueryable{QueryableWithFilter: queryable})
335337
}
336-
queryable := NewQueryable(wDistributorQueriable, wQueriables, batch.NewChunkMergeIterator, cfg, overrides)
338+
queryable := NewQueryable(wDistributorQueriable, wQueriables, cfg, overrides)
337339
opts := promql.EngineOpts{
338340
Logger: log.NewNopLogger(),
339341
MaxSamples: 1e6,
@@ -521,7 +523,7 @@ func TestLimits(t *testing.T) {
521523
overrides, err := validation.NewOverrides(DefaultLimitsConfig(), tc.tenantLimit)
522524
require.NoError(t, err)
523525

524-
queryable := NewQueryable(wDistributorQueriable, wQueriables, batch.NewChunkMergeIterator, cfg, overrides)
526+
queryable := NewQueryable(wDistributorQueriable, wQueriables, cfg, overrides)
525527
opts := promql.EngineOpts{
526528
Logger: log.NewNopLogger(),
527529
MaxSamples: 1e6,
@@ -1476,7 +1478,7 @@ type mockStoreQuerier struct {
14761478

14771479
// Select implements storage.Querier interface.
14781480
// The bool passed is ignored because the series is always sorted.
1479-
func (q *mockStoreQuerier) Select(ctx context.Context, _ bool, sp *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet {
1481+
func (q *mockStoreQuerier) Select(_ context.Context, _ bool, sp *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet {
14801482
// If we don't skip here, it'll make /series lookups extremely slow as all the chunks will be loaded.
14811483
// That flag is only to be set with blocks storage engine, and this is a protective measure.
14821484
if sp != nil && sp.Func == "series" {
@@ -1488,7 +1490,24 @@ func (q *mockStoreQuerier) Select(ctx context.Context, _ bool, sp *storage.Selec
14881490
return storage.ErrSeriesSet(err)
14891491
}
14901492

1491-
return partitionChunks(chunks, q.mint, q.maxt, q.chunkIteratorFunc)
1493+
cs := make([]storage.Series, 0, len(chunks))
1494+
chunksBySeries := map[string][]chunk.Chunk{}
1495+
1496+
for _, c := range chunks {
1497+
key := client.LabelsToKeyString(c.Metric)
1498+
chunksBySeries[key] = append(chunksBySeries[key], c)
1499+
}
1500+
1501+
for i, c := range chunksBySeries {
1502+
cs = append(cs, &storage.SeriesEntry{
1503+
Lset: chunksBySeries[i][0].Metric,
1504+
SampleIteratorFn: func(it chunkenc.Iterator) chunkenc.Iterator {
1505+
return q.chunkIteratorFunc(it, c, model.Time(mint), model.Time(maxt))
1506+
},
1507+
})
1508+
}
1509+
1510+
return series.NewConcreteSeriesSet(true, cs)
14921511
}
14931512

14941513
func (q *mockStoreQuerier) LabelValues(ctx context.Context, name string, _ *storage.LabelHints, labels ...*labels.Matcher) ([]string, annotations.Annotations, error) {

0 commit comments

Comments
 (0)