Skip to content

Commit c4303a3

Browse files
authored
Expose DetailedMetricsEnabled for all ring configs (cortexproject#6926)
* Expose DetailedMetricsEnabled for all ring configs Signed-off-by: Daniel Deluiggi <[email protected]> * changelog Signed-off-by: Daniel Deluiggi <[email protected]> --------- Signed-off-by: Daniel Deluiggi <[email protected]>
1 parent a9b6c20 commit c4303a3

File tree

9 files changed

+77
-19
lines changed

9 files changed

+77
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
* [ENHANCEMENT] Querier: Support query limits in parquet queryable. #6870
6262
* [ENHANCEMENT] Ring: Add zone label to ring_members metric. #6900
6363
* [ENHANCEMENT] Ingester: Add new metric `cortex_ingester_push_errors_total` to track reasons for ingester request failures. #6901
64+
* [ENHANCEMENT] Ring: Expose `detailed_metrics_enabled` for all rings. Default true. #6926
6465
* [ENHANCEMENT] Parquet Storage: Allow Parquet Queryable to disable fallback to Store Gateway. #6920
6566
* [ENHANCEMENT] Query Frontend: Add a `format_query` label value to the `op` label at `cortex_query_frontend_queries_total` metric. #6925
6667
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517

docs/blocks-storage/compactor.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,12 @@ compactor:
268268
# CLI flag: -compactor.auto-forget-delay
269269
[auto_forget_delay: <duration> | default = 2m]
270270

271+
# Set to true to enable ring detailed metrics. These metrics provide
272+
# detailed information, such as token count and ownership per tenant.
273+
# Disabling them can significantly decrease the number of metrics emitted.
274+
# CLI flag: -compactor.ring.detailed-metrics-enabled
275+
[detailed_metrics_enabled: <boolean> | default = true]
276+
271277
# Minimum time to wait for ring stability at startup. 0 to disable.
272278
# CLI flag: -compactor.ring.wait-stability-min-duration
273279
[wait_stability_min_duration: <duration> | default = 1m]

docs/blocks-storage/store-gateway.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,12 @@ store_gateway:
303303
# CLI flag: -store-gateway.sharding-ring.keep-instance-in-the-ring-on-shutdown
304304
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]
305305

306+
# Set to true to enable ring detailed metrics. These metrics provide
307+
# detailed information, such as token count and ownership per tenant.
308+
# Disabling them can significantly decrease the number of metrics emitted.
309+
# CLI flag: -store-gateway.sharding-ring.detailed-metrics-enabled
310+
[detailed_metrics_enabled: <boolean> | default = true]
311+
306312
# Minimum time to wait for ring stability at startup. 0 to disable.
307313
# CLI flag: -store-gateway.sharding-ring.wait-stability-min-duration
308314
[wait_stability_min_duration: <duration> | default = 1m]

docs/configuration/config-file-reference.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,12 @@ sharding_ring:
529529
# CLI flag: -alertmanager.sharding-ring.tokens-file-path
530530
[tokens_file_path: <string> | default = ""]
531531
532+
# Set to true to enable ring detailed metrics. These metrics provide detailed
533+
# information, such as token count and ownership per tenant. Disabling them
534+
# can significantly decrease the number of metrics emitted.
535+
# CLI flag: -alertmanager.sharding-ring.detailed-metrics-enabled
536+
[detailed_metrics_enabled: <boolean> | default = true]
537+
532538
# The sleep seconds when alertmanager is shutting down. Need to be close to or
533539
# larger than KV Store information propagation delay
534540
# CLI flag: -alertmanager.sharding-ring.final-sleep
@@ -2527,6 +2533,12 @@ sharding_ring:
25272533
# CLI flag: -compactor.auto-forget-delay
25282534
[auto_forget_delay: <duration> | default = 2m]
25292535
2536+
# Set to true to enable ring detailed metrics. These metrics provide detailed
2537+
# information, such as token count and ownership per tenant. Disabling them
2538+
# can significantly decrease the number of metrics emitted.
2539+
# CLI flag: -compactor.ring.detailed-metrics-enabled
2540+
[detailed_metrics_enabled: <boolean> | default = true]
2541+
25302542
# Minimum time to wait for ring stability at startup. 0 to disable.
25312543
# CLI flag: -compactor.ring.wait-stability-min-duration
25322544
[wait_stability_min_duration: <duration> | default = 1m]
@@ -2948,6 +2960,12 @@ ring:
29482960
# CLI flag: -distributor.ring.heartbeat-timeout
29492961
[heartbeat_timeout: <duration> | default = 1m]
29502962
2963+
# Set to true to enable ring detailed metrics. These metrics provide detailed
2964+
# information, such as token count and ownership per tenant. Disabling them
2965+
# can significantly decrease the number of metrics emitted.
2966+
# CLI flag: -distributor.ring.detailed-metrics-enabled
2967+
[detailed_metrics_enabled: <boolean> | default = true]
2968+
29512969
# Name of network interface to read address from.
29522970
# CLI flag: -distributor.ring.instance-interface-names
29532971
[instance_interface_names: <list of string> | default = [eth0 en0]]
@@ -5102,6 +5120,12 @@ ring:
51025120
# CLI flag: -ruler.ring.tokens-file-path
51035121
[tokens_file_path: <string> | default = ""]
51045122

5123+
# Set to true to enable ring detailed metrics. These metrics provide detailed
5124+
# information, such as token count and ownership per tenant. Disabling them
5125+
# can significantly decrease the number of metrics emitted.
5126+
# CLI flag: -ruler.ring.detailed-metrics-enabled
5127+
[detailed_metrics_enabled: <boolean> | default = true]
5128+
51055129
# Name of network interface to read address from.
51065130
# CLI flag: -ruler.ring.instance-interface-names
51075131
[instance_interface_names: <list of string> | default = [eth0 en0]]
@@ -6121,6 +6145,12 @@ sharding_ring:
61216145
# CLI flag: -store-gateway.sharding-ring.keep-instance-in-the-ring-on-shutdown
61226146
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]
61236147
6148+
# Set to true to enable ring detailed metrics. These metrics provide detailed
6149+
# information, such as token count and ownership per tenant. Disabling them
6150+
# can significantly decrease the number of metrics emitted.
6151+
# CLI flag: -store-gateway.sharding-ring.detailed-metrics-enabled
6152+
[detailed_metrics_enabled: <boolean> | default = true]
6153+
61246154
# Minimum time to wait for ring stability at startup. 0 to disable.
61256155
# CLI flag: -store-gateway.sharding-ring.wait-stability-min-duration
61266156
[wait_stability_min_duration: <duration> | default = 1m]

pkg/alertmanager/alertmanager_ring.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,13 @@ var SyncRingOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE, ring.JOINING}, fun
4343
// is used to strip down the config to the minimum, and avoid confusion
4444
// to the user.
4545
type RingConfig struct {
46-
KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances."`
47-
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
48-
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
49-
ReplicationFactor int `yaml:"replication_factor"`
50-
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
51-
TokensFilePath string `yaml:"tokens_file_path"`
46+
KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances."`
47+
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
48+
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
49+
ReplicationFactor int `yaml:"replication_factor"`
50+
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
51+
TokensFilePath string `yaml:"tokens_file_path"`
52+
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`
5253

5354
FinalSleep time.Duration `yaml:"final_sleep"`
5455
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
@@ -88,6 +89,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
8889
f.IntVar(&cfg.ReplicationFactor, rfprefix+"replication-factor", 3, "The replication factor to use when sharding the alertmanager.")
8990
f.BoolVar(&cfg.ZoneAwarenessEnabled, rfprefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate alerts across different availability zones.")
9091
f.StringVar(&cfg.TokensFilePath, rfprefix+"tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")
92+
f.BoolVar(&cfg.DetailedMetricsEnabled, rfprefix+"detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")
9193

9294
// Instance flags
9395
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
@@ -134,6 +136,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
134136
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
135137
rc.ReplicationFactor = cfg.ReplicationFactor
136138
rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled
139+
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled
137140

138141
return rc
139142
}

pkg/compactor/compactor_ring.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ import (
1818
// is used to strip down the config to the minimum, and avoid confusion
1919
// to the user.
2020
type RingConfig struct {
21-
KVStore kv.Config `yaml:"kvstore"`
22-
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
23-
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
24-
AutoForgetDelay time.Duration `yaml:"auto_forget_delay"`
21+
KVStore kv.Config `yaml:"kvstore"`
22+
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
23+
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
24+
AutoForgetDelay time.Duration `yaml:"auto_forget_delay"`
25+
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`
2526

2627
// Wait ring stability.
2728
WaitStabilityMinDuration time.Duration `yaml:"wait_stability_min_duration"`
@@ -55,6 +56,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
5556
cfg.KVStore.RegisterFlagsWithPrefix("compactor.ring.", "collectors/", f)
5657
f.DurationVar(&cfg.HeartbeatPeriod, "compactor.ring.heartbeat-period", 5*time.Second, "Period at which to heartbeat to the ring. 0 = disabled.")
5758
f.DurationVar(&cfg.HeartbeatTimeout, "compactor.ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which compactors are considered unhealthy within the ring. 0 = never (timeout disabled).")
59+
f.BoolVar(&cfg.DetailedMetricsEnabled, "compactor.ring.detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")
5860
f.DurationVar(&cfg.AutoForgetDelay, "compactor.auto-forget-delay", 2*cfg.HeartbeatTimeout, "Time since last heartbeat before compactor will be removed from ring. 0 to disable")
5961

6062
// Wait stability flags.
@@ -89,6 +91,7 @@ func (cfg *RingConfig) ToLifecyclerConfig() ring.LifecyclerConfig {
8991
rc.KVStore = cfg.KVStore
9092
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
9193
rc.ReplicationFactor = 1
94+
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled
9295

9396
// Configure lifecycler
9497
lc.RingConfig = rc

pkg/distributor/distributor_ring.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ import (
1818
// is used to strip down the config to the minimum, and avoid confusion
1919
// to the user.
2020
type RingConfig struct {
21-
KVStore kv.Config `yaml:"kvstore"`
22-
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
23-
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
21+
KVStore kv.Config `yaml:"kvstore"`
22+
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
23+
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
24+
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`
2425

2526
// Instance details
2627
InstanceID string `yaml:"instance_id" doc:"hidden"`
@@ -44,6 +45,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
4445
cfg.KVStore.RegisterFlagsWithPrefix("distributor.ring.", "collectors/", f)
4546
f.DurationVar(&cfg.HeartbeatPeriod, "distributor.ring.heartbeat-period", 5*time.Second, "Period at which to heartbeat to the ring. 0 = disabled.")
4647
f.DurationVar(&cfg.HeartbeatTimeout, "distributor.ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which distributors are considered unhealthy within the ring. 0 = never (timeout disabled).")
48+
f.BoolVar(&cfg.DetailedMetricsEnabled, "distributor.ring.detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")
4749

4850
// Instance flags
4951
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
@@ -94,6 +96,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
9496
rc.KVStore = cfg.KVStore
9597
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
9698
rc.ReplicationFactor = 1
99+
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled
97100

98101
return rc
99102
}

pkg/ruler/ruler_ring.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,13 @@ var ListRuleRingOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE, ring.LEAVING},
3838
// is used to strip down the config to the minimum, and avoid confusion
3939
// to the user.
4040
type RingConfig struct {
41-
KVStore kv.Config `yaml:"kvstore"`
42-
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
43-
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
44-
ReplicationFactor int `yaml:"replication_factor"`
45-
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
46-
TokensFilePath string `yaml:"tokens_file_path"`
41+
KVStore kv.Config `yaml:"kvstore"`
42+
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
43+
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
44+
ReplicationFactor int `yaml:"replication_factor"`
45+
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
46+
TokensFilePath string `yaml:"tokens_file_path"`
47+
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`
4748

4849
// Instance details
4950
InstanceID string `yaml:"instance_id" doc:"hidden"`
@@ -77,6 +78,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
7778
f.IntVar(&cfg.ReplicationFactor, "ruler.ring.replication-factor", 1, "EXPERIMENTAL: The replication factor to use when loading rule groups for API HA.")
7879
f.BoolVar(&cfg.ZoneAwarenessEnabled, "ruler.ring.zone-awareness-enabled", false, "EXPERIMENTAL: True to enable zone-awareness and load rule groups across different availability zones for API HA.")
7980
f.StringVar(&cfg.TokensFilePath, "ruler.ring.tokens-file-path", "", "EXPERIMENTAL: File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")
81+
f.BoolVar(&cfg.DetailedMetricsEnabled, "ruler.ring.detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")
8082

8183
// Instance flags
8284
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
@@ -119,6 +121,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
119121
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
120122
rc.SubringCacheDisabled = true
121123
rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled
124+
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled
122125

123126
// Each rule group is evaluated by *exactly* one ruler, but it can be loaded by multiple rulers for API HA
124127
rc.ReplicationFactor = cfg.ReplicationFactor

pkg/storegateway/gateway_ring.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ type RingConfig struct {
6868
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
6969
KeepInstanceInTheRingOnShutdown bool `yaml:"keep_instance_in_the_ring_on_shutdown"`
7070
ZoneStableShuffleSharding bool `yaml:"zone_stable_shuffle_sharding" doc:"hidden"`
71+
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`
7172

7273
// Wait ring stability.
7374
WaitStabilityMinDuration time.Duration `yaml:"wait_stability_min_duration"`
@@ -107,6 +108,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
107108
f.BoolVar(&cfg.ZoneAwarenessEnabled, ringFlagsPrefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate blocks across different availability zones.")
108109
f.BoolVar(&cfg.KeepInstanceInTheRingOnShutdown, ringFlagsPrefix+"keep-instance-in-the-ring-on-shutdown", false, "True to keep the store gateway instance in the ring when it shuts down. The instance will then be auto-forgotten from the ring after 10*heartbeat_timeout.")
109110
f.BoolVar(&cfg.ZoneStableShuffleSharding, ringFlagsPrefix+"zone-stable-shuffle-sharding", true, "If true, use zone stable shuffle sharding algorithm. Otherwise, use the default shuffle sharding algorithm.")
111+
f.BoolVar(&cfg.DetailedMetricsEnabled, ringFlagsPrefix+"detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")
110112

111113
// Wait stability flags.
112114
f.DurationVar(&cfg.WaitStabilityMinDuration, ringFlagsPrefix+"wait-stability-min-duration", time.Minute, "Minimum time to wait for ring stability at startup. 0 to disable.")
@@ -138,6 +140,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
138140
rc.ReplicationFactor = cfg.ReplicationFactor
139141
rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled
140142
rc.SubringCacheDisabled = true
143+
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled
141144

142145
return rc
143146
}

0 commit comments

Comments
 (0)