From b2538dc8273f15b085a8591661c9bd673fb3f424 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Wed, 22 Oct 2025 23:44:49 -0700 Subject: [PATCH 1/6] apply low selectivity matchers lazily in ingester Signed-off-by: yeya24 --- docs/configuration/config-file-reference.md | 6 + pkg/ingester/ingester.go | 18 +- pkg/ingester/matchers.go | 61 ++++ pkg/ingester/matchers_test.go | 312 ++++++++++++++++++++ schemas/cortex-config-schema.json | 6 + 5 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 pkg/ingester/matchers.go create mode 100644 pkg/ingester/matchers_test.go diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 3eb66084940..7c214a9d526 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3738,6 +3738,12 @@ instance_limits: # CLI flag: -ingester.skip-metadata-limits [skip_metadata_limits: | default = true] +# Enable optimization of label matchers when query chunks. When enabled, +# matchers with low selectivity such as =~.+ are applied lazily during series +# scanning instead of being used for postings matching. +# CLI flag: -ingester.enable-matcher-optimization +[enable_matcher_optimization: | default = false] + query_protection: rejection: threshold: diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 2330fdec595..897ab8acc2c 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -162,6 +162,10 @@ type Config struct { // If enabled, the metadata API returns all metadata regardless of the limits. SkipMetadataLimits bool `yaml:"skip_metadata_limits"` + // When enabled, matchers with low selectivity are applied lazily during series scanning + // instead of being used for postings selection. + EnableMatcherOptimization bool `yaml:"enable_matcher_optimization"` + QueryProtection configs.QueryProtection `yaml:"query_protection"` } @@ -185,6 +189,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&cfg.DisableChunkTrimming, "ingester.disable-chunk-trimming", false, "Disable trimming of matching series chunks based on query Start and End time. When disabled, the result may contain samples outside the queried time range but select performances may be improved. Note that certain query results might change by changing this option.") f.IntVar(&cfg.MatchersCacheMaxItems, "ingester.matchers-cache-max-items", 0, "Maximum number of entries in the regex matchers cache. 0 to disable.") f.BoolVar(&cfg.SkipMetadataLimits, "ingester.skip-metadata-limits", true, "If enabled, the metadata API returns all metadata regardless of the limits.") + f.BoolVar(&cfg.EnableMatcherOptimization, "ingester.enable-matcher-optimization", false, "Enable optimization of label matchers when query chunks. When enabled, matchers with low selectivity such as =~.+ are applied lazily during series scanning instead of being used for postings matching.") cfg.DefaultLimits.RegisterFlagsWithPrefix(f, "ingester.") cfg.QueryProtection.RegisterFlagsWithPrefix(f, "ingester.") @@ -2295,6 +2300,10 @@ func (i *Ingester) queryStreamChunks(ctx context.Context, db *userTSDB, from, th End: through, DisableTrimming: i.cfg.DisableChunkTrimming, } + var lazyMatchers []*labels.Matcher + if i.cfg.EnableMatcherOptimization { + matchers, lazyMatchers = optimizeMatchers(matchers) + } // It's not required to return sorted series because series are sorted by the Cortex querier. ss := q.Select(ctx, false, hints, matchers...) c() @@ -2308,14 +2317,19 @@ func (i *Ingester) queryStreamChunks(ctx context.Context, db *userTSDB, from, th var it chunks.Iterator for ss.Next() { series := ss.At() + lbls := series.Labels() + + if !labelsMatches(lbls, lazyMatchers) { + continue + } - if sm.IsSharded() && !sm.MatchesLabels(series.Labels()) { + if sm.IsSharded() && !sm.MatchesLabels(lbls) { continue } // convert labels to LabelAdapter ts := client.TimeSeriesChunk{ - Labels: cortexpb.FromLabelsToLabelAdapters(series.Labels()), + Labels: cortexpb.FromLabelsToLabelAdapters(lbls), } it := series.Iterator(it) diff --git a/pkg/ingester/matchers.go b/pkg/ingester/matchers.go new file mode 100644 index 00000000000..57be22a82a4 --- /dev/null +++ b/pkg/ingester/matchers.go @@ -0,0 +1,61 @@ +package ingester + +import "slices" + +import "github.com/prometheus/prometheus/model/labels" + +// optimizeMatchers categorizes input matchers to matchers used in select and matchers applied lazily +// when scanning series. +func optimizeMatchers(matchers []*labels.Matcher) ([]*labels.Matcher, []*labels.Matcher) { + // If there is only 1 matcher, use it for select. + // If there is no matcher to optimize, also return early. + if len(matchers) < 2 || !canOptimizeMatchers(matchers) { + return matchers, nil + } + selectMatchers := make([]*labels.Matcher, 0, len(matchers)) + lazyMatchers := make([]*labels.Matcher, 0) + for _, m := range matchers { + // =~.* is a noop as it matches everything. + if m.Type == labels.MatchRegexp && m.Value == ".*" { + continue + } + if lazyMatcher(m) { + lazyMatchers = append(lazyMatchers, m) + continue + } + selectMatchers = append(selectMatchers, m) + } + + // We need at least 1 select matcher. + if len(selectMatchers) == 0 { + selectMatchers = lazyMatchers[:1] + lazyMatchers = lazyMatchers[1:] + } + + return selectMatchers, lazyMatchers +} + +func canOptimizeMatchers(matchers []*labels.Matcher) bool { + return slices.ContainsFunc(matchers, lazyMatcher) +} + +func labelsMatches(lbls labels.Labels, matchers []*labels.Matcher) bool { + for _, m := range matchers { + if !m.Matches(lbls.Get(m.Name)) { + return false + } + } + return true +} + +// lazyMatcher checks if the label matcher should be applied lazily when scanning series instead of fetching postings +// for matcher. The matchers to apply lazily are matchers that are known to have low selectivity. +func lazyMatcher(matcher *labels.Matcher) bool { + if matcher.Value == ".+" && matcher.Type == labels.MatchRegexp { + return true + } + if matcher.Value == "" && (matcher.Type == labels.MatchNotEqual || matcher.Type == labels.MatchNotRegexp) { + return true + } + return false +} diff --git a/pkg/ingester/matchers_test.go b/pkg/ingester/matchers_test.go new file mode 100644 index 00000000000..eaf1e44cb42 --- /dev/null +++ b/pkg/ingester/matchers_test.go @@ -0,0 +1,312 @@ +package ingester + +import ( + "testing" + + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/assert" +) + +// matcherEqual compares two matchers for equality +func matcherEqual(a, b *labels.Matcher) bool { + if a == nil && b == nil { + return true + } + if a == nil || b == nil { + return false + } + return a.Type == b.Type && a.Name == b.Name && a.Value == b.Value +} + +// matchersEqual compares two slices of matchers for equality +func matchersEqual(a, b []*labels.Matcher) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !matcherEqual(a[i], b[i]) { + return false + } + } + return true +} + +func TestOptimizeMatchers(t *testing.T) { + tests := map[string]struct { + input []*labels.Matcher + expectedSelect []*labels.Matcher + expectedLazy []*labels.Matcher + }{ + "single matcher returns as select": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + }, + expectedLazy: nil, + }, + "no matchers returns empty": { + input: []*labels.Matcher{}, + expectedSelect: []*labels.Matcher{}, + expectedLazy: nil, + }, + "no optimization needed returns all as select": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expectedLazy: nil, + }, + "no optimization needed with .* regex returns all as select": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "job", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "job", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expectedLazy: nil, + }, + "filters out noop regex .* when optimization is needed": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "job", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + labels.MustNewMatcher(labels.MatchRegexp, "env", ".+"), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expectedLazy: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "env", ".+"), + }, + }, + "moves lazy matchers to lazy category": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchRegexp, "instance", ".+"), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + }, + expectedLazy: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "instance", ".+"), + }, + }, + "moves not equal empty string to lazy": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchNotEqual, "instance", ""), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + }, + expectedLazy: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchNotEqual, "instance", ""), + }, + }, + "moves not regex empty string to lazy": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchNotRegexp, "instance", ""), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + }, + expectedLazy: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchNotRegexp, "instance", ""), + }, + }, + "ensures at least one select matcher": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "instance", ".+"), + labels.MustNewMatcher(labels.MatchNotEqual, "job", ""), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "instance", ".+"), + }, + expectedLazy: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchNotEqual, "job", ""), + }, + }, + "complex case with multiple matchers": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchRegexp, "instance", ".*"), // noop + labels.MustNewMatcher(labels.MatchRegexp, "env", ".+"), // lazy + labels.MustNewMatcher(labels.MatchNotEqual, "region", ""), // lazy + labels.MustNewMatcher(labels.MatchEqual, "version", "v1"), + }, + expectedSelect: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchEqual, "version", "v1"), + }, + expectedLazy: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "env", ".+"), + labels.MustNewMatcher(labels.MatchNotEqual, "region", ""), + }, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + selectMatchers, lazyMatchers := optimizeMatchers(testData.input) + + assert.True(t, matchersEqual(testData.expectedSelect, selectMatchers), + "Expected select matchers: %v, got: %v", testData.expectedSelect, selectMatchers) + assert.True(t, matchersEqual(testData.expectedLazy, lazyMatchers), + "Expected lazy matchers: %v, got: %v", testData.expectedLazy, lazyMatchers) + }) + } +} + +func TestCanOptimizeMatchers(t *testing.T) { + tests := map[string]struct { + input []*labels.Matcher + expected bool + }{ + "no lazy matchers returns false": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expected: false, + }, + "has lazy matcher returns true": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchRegexp, "instance", ".+"), + }, + expected: true, + }, + "has not equal empty string returns true": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchNotEqual, "instance", ""), + }, + expected: true, + }, + "has not regex empty string returns true": { + input: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchNotRegexp, "instance", ""), + }, + expected: true, + }, + "empty matchers returns false": { + input: []*labels.Matcher{}, + expected: false, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + result := canOptimizeMatchers(testData.input) + assert.Equal(t, testData.expected, result) + }) + } +} + +func TestLabelsMatches(t *testing.T) { + tests := map[string]struct { + labels labels.Labels + matchers []*labels.Matcher + expected bool + }{ + "all matchers match": { + labels: labels.FromStrings("job", "test", "instance", "server1", "env", "prod"), + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expected: true, + }, + "one matcher does not match": { + labels: labels.FromStrings("job", "test", "instance", "server1"), + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchEqual, "instance", "server2"), + }, + expected: false, + }, + "regex matcher matches": { + labels: labels.FromStrings("job", "test", "instance", "server1"), + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "instance", "server.*"), + }, + expected: true, + }, + "missing label matches not equal": { + labels: labels.FromStrings("job", "test"), + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchNotEqual, "instance", "server1"), + }, + expected: true, + }, + "missing label does not match equal": { + labels: labels.FromStrings("job", "test"), + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + }, + expected: false, + }, + "empty matchers returns true": { + labels: labels.FromStrings("job", "test"), + matchers: []*labels.Matcher{}, + expected: true, + }, + "complex case with multiple matchers": { + labels: labels.FromStrings("job", "test", "instance", "server1", "env", "prod", "version", "v1"), + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "job", "test"), + labels.MustNewMatcher(labels.MatchRegexp, "instance", "server.*"), + labels.MustNewMatcher(labels.MatchNotEqual, "env", "dev"), + labels.MustNewMatcher(labels.MatchNotRegexp, "version", "v2.*"), + }, + expected: true, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + result := labelsMatches(testData.labels, testData.matchers) + assert.Equal(t, testData.expected, result) + }) + } +} + +func TestLazyMatcher(t *testing.T) { + tests := map[string]struct { + matcher *labels.Matcher + expected bool + }{ + "regex .+ is lazy": { + matcher: labels.MustNewMatcher(labels.MatchRegexp, "instance", ".+"), + expected: true, + }, + "not equal empty string is lazy": { + matcher: labels.MustNewMatcher(labels.MatchNotEqual, "instance", ""), + expected: true, + }, + "not regex empty string is lazy": { + matcher: labels.MustNewMatcher(labels.MatchNotRegexp, "instance", ""), + expected: true, + }, + "equal matcher is not lazy": { + matcher: labels.MustNewMatcher(labels.MatchEqual, "instance", "server1"), + expected: false, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + result := lazyMatcher(testData.matcher) + assert.Equal(t, testData.expected, result) + }) + } +} diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index 4f3206c3f62..cc568fb1221 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -4466,6 +4466,12 @@ "type": "boolean", "x-cli-flag": "ingester.disable-chunk-trimming" }, + "enable_matcher_optimization": { + "default": false, + "description": "Enable optimization of label matchers when query chunks. When enabled, matchers with low selectivity such as =~.+ are applied lazily during series scanning instead of being used for postings matching.", + "type": "boolean", + "x-cli-flag": "ingester.enable-matcher-optimization" + }, "ignore_series_limit_for_metric_names": { "description": "Comma-separated list of metric names, for which -ingester.max-series-per-metric and -ingester.max-global-series-per-metric limits will be ignored. Does not affect max-series-per-user or max-global-series-per-metric limits.", "type": "string", From 025d167f1dd9e1cd0fbc08c61508b261dec56e2a Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Thu, 23 Oct 2025 07:06:10 +0000 Subject: [PATCH 2/6] changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fc7022eea1..ff1734be163 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,7 @@ * [ENHANCEMENT] Ingester: Add `cortex_ingester_tsdb_head_stale_series` metric to keep track of number of stale series on head. #7071 * [ENHANCEMENT] Expose more Go runtime metrics. #7070 * [ENHANCEMENT] Distributor: Filter out label with empty value. #7069 +* [ENHANCEMENT] Ingester: Add `enable_matcher_optimization` config to apply low selectivity matchers lazily. #7063 * [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517 * [BUGFIX] Ingester: Fix labelset data race condition. #6573 * [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576 From 99964af19e5759d37d20a291063d2b4562eb2121 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Thu, 23 Oct 2025 00:11:09 -0700 Subject: [PATCH 3/6] lint Signed-off-by: yeya24 --- pkg/ingester/matchers.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/ingester/matchers.go b/pkg/ingester/matchers.go index 57be22a82a4..18792511d02 100644 --- a/pkg/ingester/matchers.go +++ b/pkg/ingester/matchers.go @@ -1,8 +1,10 @@ package ingester -import "slices" +import ( + "slices" -import "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/labels" +) // optimizeMatchers categorizes input matchers to matchers used in select and matchers applied lazily // when scanning series. From e114b6e05dcc5eaf939aa65cfd672f12d1548de0 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Fri, 24 Oct 2025 00:27:23 -0700 Subject: [PATCH 4/6] add benchmark Signed-off-by: yeya24 --- pkg/ingester/ingester_test.go | 114 ++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index d8313aac3e4..61a51f60920 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -42,6 +42,7 @@ import ( "github.com/thanos-io/thanos/pkg/runutil" "github.com/thanos-io/thanos/pkg/shipper" storecache "github.com/thanos-io/thanos/pkg/store/cache" + "github.com/thanos-io/thanos/pkg/store/storepb" "github.com/weaveworks/common/httpgrpc" "github.com/weaveworks/common/middleware" "github.com/weaveworks/common/user" @@ -3941,6 +3942,119 @@ func BenchmarkIngester_QueryStream_Chunks(b *testing.B) { } } +func BenchmarkIngester_QueryStreamChunks_MatcherOptimization(b *testing.B) { + tests := map[string]struct { + matchers []*labels.Matcher + description string + }{ + "metric name with regex matchers": { + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_metric"), + labels.MustNewMatcher(labels.MatchRegexp, "region", ".+"), + labels.MustNewMatcher(labels.MatchRegexp, "job", ".+"), + }, + description: "Metric name with .+ regex matchers", + }, + "metric name with not equal empty": { + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_metric"), + labels.MustNewMatcher(labels.MatchNotEqual, "env", ""), + labels.MustNewMatcher(labels.MatchNotEqual, "pod", ""), + }, + description: "Metric name with != \"\" matchers", + }, + "complex matchers": { + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_metric"), + labels.MustNewMatcher(labels.MatchRegexp, "region", ".+"), + labels.MustNewMatcher(labels.MatchRegexp, "job", ".+"), + labels.MustNewMatcher(labels.MatchRegexp, "env", ".+"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".+"), + }, + description: "Complex matchers with .+ regex", + }, + } + + for testName, testData := range tests { + b.Run(testName+"_optimization_disabled", func(b *testing.B) { + benchmarkQueryStreamChunksWithMatcherOptimization(b, false, testData.matchers, testData.description+" without optimization") + }) + b.Run(testName+"_optimization_enabled", func(b *testing.B) { + benchmarkQueryStreamChunksWithMatcherOptimization(b, true, testData.matchers, testData.description+" with optimization") + }) + } +} + +func benchmarkQueryStreamChunksWithMatcherOptimization(b *testing.B, enableMatcherOptimization bool, matchers []*labels.Matcher, description string) { + cfg := defaultIngesterTestConfig(b) + cfg.EnableMatcherOptimization = enableMatcherOptimization + + i, err := prepareIngesterWithBlocksStorage(b, cfg, prometheus.NewRegistry()) + require.NoError(b, err) + require.NoError(b, services.StartAndAwaitRunning(context.Background(), i)) + defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck + + // Wait until it's ACTIVE + test.Poll(b, 1*time.Second, ring.ACTIVE, func() any { + return i.lifecycler.GetState() + }) + + ctx := user.InjectOrgID(context.Background(), userID) + + for s := 0; s < 1000; s++ { + lbls := labels.FromStrings( + labels.MetricName, "test_metric", + "region", fmt.Sprintf("region-%d", s%10), + "job", fmt.Sprintf("job-%d", s%20), + "env", fmt.Sprintf("env-%d", s%5), + "pod", fmt.Sprintf("pod-%d", s%1000), + ) + + samples := make([]cortexpb.Sample, 0, 5) + for t := 0; t < 5; t++ { + samples = append(samples, cortexpb.Sample{ + Value: float64(s + t), + TimestampMs: int64(s*5 + t), + }) + } + + // Create labels slice with same length as samples + labelsSlice := make([]labels.Labels, len(samples)) + for j := range labelsSlice { + labelsSlice[j] = lbls + } + + req := cortexpb.ToWriteRequest(labelsSlice, samples, nil, nil, cortexpb.API) + _, err = i.Push(ctx, req) + require.NoError(b, err) + } + + db, err := i.getTSDB(userID) + require.NoError(b, err) + require.NotNil(b, db) + + mockStream := &mockQueryStreamServer{ctx: ctx} + sm := (&storepb.ShardInfo{ + TotalShards: 0, + }).Matcher(nil) + + b.ReportAllocs() + b.ResetTimer() + + for b.Loop() { + numSeries, numSamples, _, numChunks, err := i.queryStreamChunks( + ctx, db, 0, 5000, matchers, sm, mockStream) + + require.NoError(b, err) + require.Greater(b, numSeries, 0) + require.Greater(b, numSamples, 0) + require.Greater(b, numChunks, 0) + + // Reset the mock stream for next iteration + mockStream.series = mockStream.series[:0] + } +} + func benchmarkQueryStream(b *testing.B, samplesCount, seriesCount int) { cfg := defaultIngesterTestConfig(b) From 5d7adc516035b5aca1a98cb03092d57f2fba6bd9 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Fri, 24 Oct 2025 00:46:13 -0700 Subject: [PATCH 5/6] lint Signed-off-by: yeya24 --- pkg/ingester/ingester_test.go | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 61a51f60920..33ed8f1292b 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -3963,6 +3963,13 @@ func BenchmarkIngester_QueryStreamChunks_MatcherOptimization(b *testing.B) { }, description: "Metric name with != \"\" matchers", }, + "metric name with sparse label": { + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_metric"), + labels.MustNewMatcher(labels.MatchRegexp, "sparse_label", ".+"), + }, + description: "Metric name with sparse label matcher", + }, "complex matchers": { matchers: []*labels.Matcher{ labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_metric"), @@ -4001,17 +4008,25 @@ func benchmarkQueryStreamChunksWithMatcherOptimization(b *testing.B, enableMatch ctx := user.InjectOrgID(context.Background(), userID) - for s := 0; s < 1000; s++ { - lbls := labels.FromStrings( + for s := range 1000 { + // Create base labels + labelPairs := []string{ labels.MetricName, "test_metric", "region", fmt.Sprintf("region-%d", s%10), "job", fmt.Sprintf("job-%d", s%20), "env", fmt.Sprintf("env-%d", s%5), "pod", fmt.Sprintf("pod-%d", s%1000), - ) + } + + // Add sparse label only for half of the series + if s%2 == 0 { + labelPairs = append(labelPairs, "sparse_label", fmt.Sprintf("sparse-%d", s%50)) + } + + lbls := labels.FromStrings(labelPairs...) samples := make([]cortexpb.Sample, 0, 5) - for t := 0; t < 5; t++ { + for t := range 5 { samples = append(samples, cortexpb.Sample{ Value: float64(s + t), TimestampMs: int64(s*5 + t), From 851c6b10d3d4097554d3b582a2adba8088d2d699 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Sat, 1 Nov 2025 10:51:55 -0700 Subject: [PATCH 6/6] update changelog Signed-off-by: yeya24 --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff1734be163..c0e5c705cba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,9 @@ ## master / unreleased +* [ENHANCEMENT] Ingester: Add `enable_matcher_optimization` config to apply low selectivity matchers lazily. #7063 * [BUGFIX] Compactor: Avoid race condition which allow a grouper to not compact all partitions. #7082 - ## 1.20.0 in progress * [CHANGE] StoreGateway/Alertmanager: Add default 5s connection timeout on client. #6603 @@ -91,7 +91,6 @@ * [ENHANCEMENT] Ingester: Add `cortex_ingester_tsdb_head_stale_series` metric to keep track of number of stale series on head. #7071 * [ENHANCEMENT] Expose more Go runtime metrics. #7070 * [ENHANCEMENT] Distributor: Filter out label with empty value. #7069 -* [ENHANCEMENT] Ingester: Add `enable_matcher_optimization` config to apply low selectivity matchers lazily. #7063 * [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517 * [BUGFIX] Ingester: Fix labelset data race condition. #6573 * [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576