Skip to content

Commit b167dd4

Browse files
authored
Merge pull request #6104 from yeya24/max-bucket-limit
2 parents 0edfd7d + a88d237 commit b167dd4

File tree

8 files changed

+219
-9
lines changed

8 files changed

+219
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* [FEATURE] Query Frontend: Added a query rejection mechanism to block resource-intensive queries. #6005
1010
* [FEATURE] OTLP: Support ingesting OTLP exponential metrics as native histograms. #6071
1111
* [FEATURE] Ingester: Add `ingester.instance-limits.max-inflight-query-requests` to allow limiting ingester concurrent queries. #6081
12+
* [FEATURE] Distributor: Add `validation.max-native-histogram-buckets` to limit max number of bucket count. Distributor will try to automatically reduce histogram resolution until it is within the bucket limit or resolution cannot be reduced anymore. #6104
1213
* [ENHANCEMENT] rulers: Add support to persist tokens in rulers. #5987
1314
* [ENHANCEMENT] Query Frontend/Querier: Added store gateway postings touched count and touched size in Querier stats and log in Query Frontend. #5892
1415
* [ENHANCEMENT] Query Frontend/Querier: Returns `warnings` on prometheus query responses. #5916

docs/configuration/config-file-reference.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3159,11 +3159,13 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
31593159
# e.g. remote_write.write_relabel_configs.
31603160
[metric_relabel_configs: <relabel_config...> | default = []]
31613161
3162-
# Enables support for exemplars in TSDB and sets the maximum number that will be
3163-
# stored. less than zero means disabled. If the value is set to zero, cortex
3164-
# will fallback to blocks-storage.tsdb.max-exemplars value.
3165-
# CLI flag: -ingester.max-exemplars
3166-
[max_exemplars: <int> | default = 0]
3162+
# Limit on total number of positive and negative buckets allowed in a single
3163+
# native histogram. The resolution of a histogram with more buckets will be
3164+
# reduced until the number of buckets is within the limit. If the limit cannot
3165+
# be reached, the sample will be discarded. 0 means no limit. Enforced at
3166+
# Distributor.
3167+
# CLI flag: -validation.max-native-histogram-buckets
3168+
[max_native_histogram_buckets: <int> | default = 0]
31673169
31683170
# The maximum number of active series per user, per ingester. 0 to disable.
31693171
# CLI flag: -ingester.max-series-per-user
@@ -3213,6 +3215,12 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
32133215
# CLI flag: -ingester.out-of-order-time-window
32143216
[out_of_order_time_window: <duration> | default = 0s]
32153217
3218+
# Enables support for exemplars in TSDB and sets the maximum number that will be
3219+
# stored. less than zero means disabled. If the value is set to zero, cortex
3220+
# will fallback to blocks-storage.tsdb.max-exemplars value.
3221+
# CLI flag: -ingester.max-exemplars
3222+
[max_exemplars: <int> | default = 0]
3223+
32163224
# Maximum number of chunks that can be fetched in a single query from ingesters
32173225
# and long-term storage. This limit is enforced in the querier, ruler and
32183226
# store-gateway. 0 to disable.

pkg/cortexpb/histograms.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ package cortexpb
1515

1616
import "github.com/prometheus/prometheus/model/histogram"
1717

18+
const (
19+
ExponentialSchemaMax int32 = 8
20+
ExponentialSchemaMin int32 = -4
21+
)
22+
1823
func (h Histogram) IsFloatHistogram() bool {
1924
_, ok := h.GetCount().(*Histogram_CountFloat)
2025
return ok

pkg/distributor/distributor.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -579,12 +579,16 @@ func (d *Distributor) validateSeries(ts cortexpb.PreallocTimeseries, userID stri
579579
if len(ts.Histograms) > 0 {
580580
// Only alloc when data present
581581
histograms = make([]cortexpb.Histogram, 0, len(ts.Histograms))
582-
for _, h := range ts.Histograms {
583-
// TODO(yeya24): add other validations for native histogram.
584-
// For example, Prometheus scrape has bucket limit and schema check.
582+
for i, h := range ts.Histograms {
585583
if err := validation.ValidateSampleTimestamp(d.validateMetrics, limits, userID, ts.Labels, h.TimestampMs); err != nil {
586584
return emptyPreallocSeries, err
587585
}
586+
// TODO(yeya24): add max schema validation for native histogram if needed.
587+
convertedHistogram, err := validation.ValidateNativeHistogram(d.validateMetrics, limits, userID, ts.Labels, h)
588+
if err != nil {
589+
return emptyPreallocSeries, err
590+
}
591+
ts.Histograms[i] = convertedHistogram
588592
}
589593
histograms = append(histograms, ts.Histograms...)
590594
}

pkg/util/validation/errors.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,24 @@ func newExemplarLabelLengthError(seriesLabels []cortexpb.LabelAdapter, exemplarL
225225
}
226226
}
227227

228+
// histogramBucketLimitExceededError is a ValidationError implementation for samples with native histogram
229+
// exceeding max bucket limit and cannot reduce resolution further to be within the max bucket limit.
230+
type histogramBucketLimitExceededError struct {
231+
series []cortexpb.LabelAdapter
232+
limit int
233+
}
234+
235+
func newHistogramBucketLimitExceededError(series []cortexpb.LabelAdapter, limit int) ValidationError {
236+
return &histogramBucketLimitExceededError{
237+
series: series,
238+
limit: limit,
239+
}
240+
}
241+
242+
func (e *histogramBucketLimitExceededError) Error() string {
243+
return fmt.Sprintf("native histogram bucket count exceeded for metric (limit: %d) metric: %.200q", e.limit, formatLabelSet(e.series))
244+
}
245+
228246
// formatLabelSet formats label adapters as a metric name with labels, while preserving
229247
// label order, and keeping duplicates. If there are multiple "__name__" labels, only
230248
// first one is used as metric name, other ones will be included as regular labels.

pkg/util/validation/limits.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ type Limits struct {
134134
EnforceMetricName bool `yaml:"enforce_metric_name" json:"enforce_metric_name"`
135135
IngestionTenantShardSize int `yaml:"ingestion_tenant_shard_size" json:"ingestion_tenant_shard_size"`
136136
MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty" json:"metric_relabel_configs,omitempty" doc:"nocli|description=List of metric relabel configurations. Note that in most situations, it is more effective to use metrics relabeling directly in the Prometheus server, e.g. remote_write.write_relabel_configs."`
137-
MaxExemplars int `yaml:"max_exemplars" json:"max_exemplars"`
137+
MaxNativeHistogramBuckets int `yaml:"max_native_histogram_buckets" json:"max_native_histogram_buckets"`
138138

139139
// Ingester enforced limits.
140140
// Series
@@ -151,6 +151,8 @@ type Limits struct {
151151
MaxGlobalMetadataPerMetric int `yaml:"max_global_metadata_per_metric" json:"max_global_metadata_per_metric"`
152152
// Out-of-order
153153
OutOfOrderTimeWindow model.Duration `yaml:"out_of_order_time_window" json:"out_of_order_time_window"`
154+
// Exemplars
155+
MaxExemplars int `yaml:"max_exemplars" json:"max_exemplars"`
154156

155157
// Querier enforced limits.
156158
MaxChunksPerQuery int `yaml:"max_fetched_chunks_per_query" json:"max_fetched_chunks_per_query"`
@@ -232,6 +234,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
232234
f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.")
233235
f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.")
234236
f.BoolVar(&l.EnforceMetadataMetricName, "validation.enforce-metadata-metric-name", true, "Enforce every metadata has a metric name.")
237+
f.IntVar(&l.MaxNativeHistogramBuckets, "validation.max-native-histogram-buckets", 0, "Limit on total number of positive and negative buckets allowed in a single native histogram. The resolution of a histogram with more buckets will be reduced until the number of buckets is within the limit. If the limit cannot be reached, the sample will be discarded. 0 means no limit. Enforced at Distributor.")
235238

236239
f.IntVar(&l.MaxLocalSeriesPerUser, "ingester.max-series-per-user", 5000000, "The maximum number of active series per user, per ingester. 0 to disable.")
237240
f.IntVar(&l.MaxLocalSeriesPerMetric, "ingester.max-series-per-metric", 50000, "The maximum number of active series per metric name, per ingester. 0 to disable.")
@@ -722,6 +725,12 @@ func (o *Overrides) EnforceMetadataMetricName(userID string) bool {
722725
return o.GetOverridesForUser(userID).EnforceMetadataMetricName
723726
}
724727

728+
// MaxNativeHistogramBuckets returns the maximum total number of positive and negative buckets of a single native histogram
729+
// a user is allowed to store.
730+
func (o *Overrides) MaxNativeHistogramBuckets(userID string) int {
731+
return o.GetOverridesForUser(userID).MaxNativeHistogramBuckets
732+
}
733+
725734
// MaxLocalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store in a single ingester.
726735
func (o *Overrides) MaxLocalMetricsWithMetadataPerUser(userID string) int {
727736
return o.GetOverridesForUser(userID).MaxLocalMetricsWithMetadataPerUser

pkg/util/validation/validate.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ const (
5252
exemplarLabelsTooLong = "exemplar_labels_too_long"
5353
exemplarTimestampInvalid = "exemplar_timestamp_invalid"
5454

55+
// Native Histogram specific validation reasons
56+
nativeHistogramBucketCountLimitExceeded = "native_histogram_buckets_exceeded"
57+
5558
// RateLimited is one of the values for the reason to discard samples.
5659
// Declared here to avoid duplication in ingester and distributor.
5760
RateLimited = "rate_limited"
@@ -262,6 +265,59 @@ func ValidateMetadata(validateMetrics *ValidateMetrics, cfg *Limits, userID stri
262265
return nil
263266
}
264267

268+
func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, userID string, ls []cortexpb.LabelAdapter, histogram cortexpb.Histogram) (cortexpb.Histogram, error) {
269+
if limits.MaxNativeHistogramBuckets == 0 {
270+
return histogram, nil
271+
}
272+
273+
var (
274+
exceedLimit bool
275+
)
276+
if histogram.IsFloatHistogram() {
277+
// Initial check to see if the bucket limit is exceeded or not. If not, we can avoid type casting.
278+
exceedLimit = len(histogram.PositiveCounts)+len(histogram.NegativeCounts) > limits.MaxNativeHistogramBuckets
279+
if !exceedLimit {
280+
return histogram, nil
281+
}
282+
// Exceed limit.
283+
if histogram.Schema <= cortexpb.ExponentialSchemaMin {
284+
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
285+
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
286+
}
287+
fh := cortexpb.FloatHistogramProtoToFloatHistogram(histogram)
288+
for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
289+
if fh.Schema <= cortexpb.ExponentialSchemaMin {
290+
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
291+
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
292+
}
293+
fh = fh.ReduceResolution(fh.Schema - 1)
294+
}
295+
// If resolution reduced, convert new float histogram to protobuf type again.
296+
return cortexpb.FloatHistogramToHistogramProto(histogram.TimestampMs, fh), nil
297+
}
298+
299+
// Initial check to see if bucket limit is exceeded or not. If not, we can avoid type casting.
300+
exceedLimit = len(histogram.PositiveDeltas)+len(histogram.NegativeDeltas) > limits.MaxNativeHistogramBuckets
301+
if !exceedLimit {
302+
return histogram, nil
303+
}
304+
// Exceed limit.
305+
if histogram.Schema <= cortexpb.ExponentialSchemaMin {
306+
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
307+
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
308+
}
309+
h := cortexpb.HistogramProtoToHistogram(histogram)
310+
for len(h.PositiveBuckets)+len(h.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
311+
if h.Schema <= cortexpb.ExponentialSchemaMin {
312+
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
313+
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
314+
}
315+
h = h.ReduceResolution(h.Schema - 1)
316+
}
317+
// If resolution reduced, convert new histogram to protobuf type again.
318+
return cortexpb.HistogramToHistogramProto(histogram.TimestampMs, h), nil
319+
}
320+
265321
func DeletePerUserValidationMetrics(validateMetrics *ValidateMetrics, userID string, log log.Logger) {
266322
filter := map[string]string{"user": userID}
267323

pkg/util/validation/validate_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"github.com/prometheus/client_golang/prometheus"
99
"github.com/prometheus/client_golang/prometheus/testutil"
1010
"github.com/prometheus/common/model"
11+
"github.com/prometheus/prometheus/model/labels"
12+
"github.com/prometheus/prometheus/tsdb/tsdbutil"
1113
"github.com/stretchr/testify/assert"
1214
"github.com/stretchr/testify/require"
1315
"github.com/weaveworks/common/httpgrpc"
@@ -291,3 +293,110 @@ func TestValidateLabelDuplication(t *testing.T) {
291293
}, "a")
292294
assert.Equal(t, expected, actual)
293295
}
296+
297+
func TestValidateNativeHistogram(t *testing.T) {
298+
userID := "fake"
299+
lbls := cortexpb.FromLabelsToLabelAdapters(labels.FromStrings("foo", "bar"))
300+
301+
// Test histogram has 4 positive buckets and 4 negative buckets so 8 in total. Schema set to 1.
302+
h := tsdbutil.GenerateTestHistogram(0)
303+
fh := tsdbutil.GenerateTestFloatHistogram(0)
304+
305+
histogramWithSchemaMin := tsdbutil.GenerateTestHistogram(0)
306+
histogramWithSchemaMin.Schema = cortexpb.ExponentialSchemaMin
307+
floatHistogramWithSchemaMin := tsdbutil.GenerateTestFloatHistogram(0)
308+
floatHistogramWithSchemaMin.Schema = cortexpb.ExponentialSchemaMin
309+
for _, tc := range []struct {
310+
name string
311+
bucketLimit int
312+
histogram cortexpb.Histogram
313+
expectedHistogram cortexpb.Histogram
314+
expectedErr error
315+
}{
316+
{
317+
name: "no limit, histogram",
318+
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
319+
expectedHistogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
320+
},
321+
{
322+
name: "no limit, float histogram",
323+
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
324+
expectedHistogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
325+
},
326+
{
327+
name: "within limit, histogram",
328+
bucketLimit: 8,
329+
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
330+
expectedHistogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
331+
},
332+
{
333+
name: "within limit, float histogram",
334+
bucketLimit: 8,
335+
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
336+
expectedHistogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
337+
},
338+
{
339+
name: "exceed limit and reduce resolution for 1 level, histogram",
340+
bucketLimit: 6,
341+
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
342+
expectedHistogram: cortexpb.HistogramToHistogramProto(0, h.Copy().ReduceResolution(0)),
343+
},
344+
{
345+
name: "exceed limit and reduce resolution for 1 level, float histogram",
346+
bucketLimit: 6,
347+
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
348+
expectedHistogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy().ReduceResolution(0)),
349+
},
350+
{
351+
name: "exceed limit and reduce resolution for 2 levels, histogram",
352+
bucketLimit: 4,
353+
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
354+
expectedHistogram: cortexpb.HistogramToHistogramProto(0, h.Copy().ReduceResolution(-1)),
355+
},
356+
{
357+
name: "exceed limit and reduce resolution for 2 levels, float histogram",
358+
bucketLimit: 4,
359+
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
360+
expectedHistogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy().ReduceResolution(-1)),
361+
},
362+
{
363+
name: "exceed limit but cannot reduce resolution further, histogram",
364+
bucketLimit: 1,
365+
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
366+
expectedErr: newHistogramBucketLimitExceededError(lbls, 1),
367+
},
368+
{
369+
name: "exceed limit but cannot reduce resolution further, float histogram",
370+
bucketLimit: 1,
371+
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
372+
expectedErr: newHistogramBucketLimitExceededError(lbls, 1),
373+
},
374+
{
375+
name: "exceed limit but cannot reduce resolution further with min schema, histogram",
376+
bucketLimit: 4,
377+
histogram: cortexpb.HistogramToHistogramProto(0, histogramWithSchemaMin.Copy()),
378+
expectedErr: newHistogramBucketLimitExceededError(lbls, 4),
379+
},
380+
{
381+
name: "exceed limit but cannot reduce resolution further with min schema, float histogram",
382+
bucketLimit: 4,
383+
histogram: cortexpb.FloatHistogramToHistogramProto(0, floatHistogramWithSchemaMin.Copy()),
384+
expectedErr: newHistogramBucketLimitExceededError(lbls, 4),
385+
},
386+
} {
387+
t.Run(tc.name, func(t *testing.T) {
388+
reg := prometheus.NewRegistry()
389+
validateMetrics := NewValidateMetrics(reg)
390+
limits := new(Limits)
391+
limits.MaxNativeHistogramBuckets = tc.bucketLimit
392+
actualHistogram, actualErr := ValidateNativeHistogram(validateMetrics, limits, userID, lbls, tc.histogram)
393+
if tc.expectedErr != nil {
394+
require.Equal(t, tc.expectedErr, actualErr)
395+
require.Equal(t, float64(1), testutil.ToFloat64(validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID)))
396+
} else {
397+
require.NoError(t, actualErr)
398+
require.Equal(t, tc.expectedHistogram, actualHistogram)
399+
}
400+
})
401+
}
402+
}

0 commit comments

Comments
 (0)