Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
* [FEATURE] Querier: Allow choosing PromQL engine via header. #6777
* [FEATURE] Querier: Support for configuring query optimizers and enabling XFunctions in the Thanos engine. #6873
* [FEATURE] Query Frontend: Add support /api/v1/format_query API for formatting queries. #6893
* [ENHANCEMENT] Ingester: Add `cortex_ingester_tsdb_wal_replay_unknown_refs_total` and `cortex_ingester_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during wal/wbl replaying. #6945
* [ENHANCEMENT] Ruler: Emit an error message when the rule synchronization fails. #6902
* [ENHANCEMENT] Querier: Support snappy and zstd response compression for `-querier.response-compression` flag. #6848
* [ENHANCEMENT] Tenant Federation: Add a # of query result limit logic when the `-tenant-federation.regex-matcher-enabled` is enabled. #6845
Expand Down
21 changes: 21 additions & 0 deletions pkg/ingester/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ const (
const (
sampleMetricTypeFloat = "float"
sampleMetricTypeHistogram = "histogram"

typeSeries = "series"
typeSamples = "samples"
typeExemplars = "exemplars"
typeHistograms = "histograms"
typeMetadata = "metadata"
typeTombstones = "tombstones"
)

type ingesterMetrics struct {
Expand Down Expand Up @@ -330,6 +337,8 @@ type tsdbMetrics struct {
tsdbWALTruncateTotal *prometheus.Desc
tsdbWALTruncateDuration *prometheus.Desc
tsdbWALCorruptionsTotal *prometheus.Desc
tsdbWALReplayUnknownRefsTotal *prometheus.Desc
tsdbWBLReplayUnknownRefsTotal *prometheus.Desc
tsdbWALWritesFailed *prometheus.Desc
tsdbHeadTruncateFail *prometheus.Desc
tsdbHeadTruncateTotal *prometheus.Desc
Expand Down Expand Up @@ -437,6 +446,14 @@ func newTSDBMetrics(r prometheus.Registerer) *tsdbMetrics {
"cortex_ingester_tsdb_wal_corruptions_total",
"Total number of TSDB WAL corruptions.",
nil, nil),
tsdbWALReplayUnknownRefsTotal: prometheus.NewDesc(
"cortex_ingester_tsdb_wal_replay_unknown_refs_total",
"Total number of unknown series references encountered during TSDB WAL replay.",
[]string{"type"}, nil),
tsdbWBLReplayUnknownRefsTotal: prometheus.NewDesc(
"cortex_ingester_tsdb_wbl_replay_unknown_refs_total",
"Total number of unknown series references encountered during TSDB WBL replay.",
[]string{"type"}, nil),
tsdbWALWritesFailed: prometheus.NewDesc(
"cortex_ingester_tsdb_wal_writes_failed_total",
"Total number of TSDB WAL writes that failed.",
Expand Down Expand Up @@ -601,6 +618,8 @@ func (sm *tsdbMetrics) Describe(out chan<- *prometheus.Desc) {
out <- sm.tsdbWALTruncateTotal
out <- sm.tsdbWALTruncateDuration
out <- sm.tsdbWALCorruptionsTotal
out <- sm.tsdbWALReplayUnknownRefsTotal
out <- sm.tsdbWBLReplayUnknownRefsTotal
out <- sm.tsdbWALWritesFailed
out <- sm.tsdbHeadTruncateFail
out <- sm.tsdbHeadTruncateTotal
Expand Down Expand Up @@ -659,6 +678,8 @@ func (sm *tsdbMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCounters(out, sm.tsdbWALTruncateTotal, "prometheus_tsdb_wal_truncations_total")
data.SendSumOfSummaries(out, sm.tsdbWALTruncateDuration, "prometheus_tsdb_wal_truncate_duration_seconds")
data.SendSumOfCounters(out, sm.tsdbWALCorruptionsTotal, "prometheus_tsdb_wal_corruptions_total")
data.SendSumOfCountersWithLabels(out, sm.tsdbWALReplayUnknownRefsTotal, "prometheus_tsdb_wal_replay_unknown_refs_total", "type")
data.SendSumOfCountersWithLabels(out, sm.tsdbWBLReplayUnknownRefsTotal, "prometheus_tsdb_wbl_replay_unknown_refs_total", "type")
data.SendSumOfCounters(out, sm.tsdbWALWritesFailed, "prometheus_tsdb_wal_writes_failed_total")
data.SendSumOfCounters(out, sm.tsdbHeadTruncateFail, "prometheus_tsdb_head_truncations_failed_total")
data.SendSumOfCounters(out, sm.tsdbHeadTruncateTotal, "prometheus_tsdb_head_truncations_total")
Expand Down
40 changes: 40 additions & 0 deletions pkg/ingester/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,18 @@ func TestTSDBMetrics(t *testing.T) {
# TYPE cortex_ingester_tsdb_wal_corruptions_total counter
cortex_ingester_tsdb_wal_corruptions_total 2.676537e+06

# HELP cortex_ingester_tsdb_wal_replay_unknown_refs_total Total number of unknown series references encountered during TSDB WAL replay.
# TYPE cortex_ingester_tsdb_wal_replay_unknown_refs_total counter
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series"} 300
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples"} 303
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="metadata"} 306

# HELP cortex_ingester_tsdb_wbl_replay_unknown_refs_total Total number of unknown series references encountered during TSDB WBL replay.
# TYPE cortex_ingester_tsdb_wbl_replay_unknown_refs_total counter
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars"} 300
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="histograms"} 303
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="tombstones"} 306

# HELP cortex_ingester_tsdb_wal_writes_failed_total Total number of TSDB WAL writes that failed.
# TYPE cortex_ingester_tsdb_wal_writes_failed_total counter
cortex_ingester_tsdb_wal_writes_failed_total 1486965
Expand Down Expand Up @@ -505,6 +517,18 @@ func TestTSDBMetricsWithRemoval(t *testing.T) {
# TYPE cortex_ingester_tsdb_wal_corruptions_total counter
cortex_ingester_tsdb_wal_corruptions_total 2.676537e+06

# HELP cortex_ingester_tsdb_wal_replay_unknown_refs_total Total number of unknown series references encountered during TSDB WAL replay.
# TYPE cortex_ingester_tsdb_wal_replay_unknown_refs_total counter
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series"} 300
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples"} 303
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="metadata"} 306

# HELP cortex_ingester_tsdb_wbl_replay_unknown_refs_total Total number of unknown series references encountered during TSDB WBL replay.
# TYPE cortex_ingester_tsdb_wbl_replay_unknown_refs_total counter
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars"} 300
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="histograms"} 303
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="tombstones"} 306

# HELP cortex_ingester_tsdb_wal_writes_failed_total Total number of TSDB WAL writes that failed.
# TYPE cortex_ingester_tsdb_wal_writes_failed_total counter
cortex_ingester_tsdb_wal_writes_failed_total 1486965
Expand Down Expand Up @@ -883,6 +907,22 @@ func populateTSDBMetrics(base float64) *prometheus.Registry {
})
snapshotReplayErrorTotal.Add(103)

walReplayUnknownRefsTotal := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_wal_replay_unknown_refs_total",
Help: "Total number of unknown series references encountered during WAL replay.",
}, []string{"type"})
walReplayUnknownRefsTotal.WithLabelValues(typeSeries).Add(100)
walReplayUnknownRefsTotal.WithLabelValues(typeSamples).Add(101)
walReplayUnknownRefsTotal.WithLabelValues(typeMetadata).Add(102)

wblReplayUnknownRefsTotal := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_wbl_replay_unknown_refs_total",
Help: "Total number of unknown series references encountered during WBL replay.",
}, []string{"type"})
wblReplayUnknownRefsTotal.WithLabelValues(typeExemplars).Add(100)
wblReplayUnknownRefsTotal.WithLabelValues(typeHistograms).Add(101)
wblReplayUnknownRefsTotal.WithLabelValues(typeTombstones).Add(102)

oooHistogram := promauto.With(r).NewHistogram(prometheus.HistogramOpts{
Name: "prometheus_tsdb_sample_ooo_delta",
Help: "Delta in seconds by which a sample is considered out of order (reported regardless of OOO time window and whether sample is accepted or not).",
Expand Down
Loading