Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
* [ENHANCEMENT] Querier: Support query limits in parquet queryable. #6870
* [ENHANCEMENT] Ring: Add zone label to ring_members metric. #6900
* [ENHANCEMENT] Ingester: Add new metric `cortex_ingester_push_errors_total` to track reasons for ingester request failures. #6901
* [ENHANCEMENT] Ring: Expose `detailed_metrics_enabled` for all rings. Default true. #6926
* [ENHANCEMENT] Parquet Storage: Allow Parquet Queryable to disable fallback to Store Gateway. #6920
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
Expand Down
6 changes: 6 additions & 0 deletions docs/blocks-storage/compactor.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,12 @@ compactor:
# CLI flag: -compactor.auto-forget-delay
[auto_forget_delay: <duration> | default = 2m]

# Set to true to enable ring detailed metrics. These metrics provide
# detailed information, such as token count and ownership per tenant.
# Disabling them can significantly decrease the number of metrics emitted.
# CLI flag: -compactor.ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# Minimum time to wait for ring stability at startup. 0 to disable.
# CLI flag: -compactor.ring.wait-stability-min-duration
[wait_stability_min_duration: <duration> | default = 1m]
Expand Down
6 changes: 6 additions & 0 deletions docs/blocks-storage/store-gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ store_gateway:
# CLI flag: -store-gateway.sharding-ring.keep-instance-in-the-ring-on-shutdown
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]

# Set to true to enable ring detailed metrics. These metrics provide
# detailed information, such as token count and ownership per tenant.
# Disabling them can significantly decrease the number of metrics emitted.
# CLI flag: -store-gateway.sharding-ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# Minimum time to wait for ring stability at startup. 0 to disable.
# CLI flag: -store-gateway.sharding-ring.wait-stability-min-duration
[wait_stability_min_duration: <duration> | default = 1m]
Expand Down
30 changes: 30 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,12 @@ sharding_ring:
# CLI flag: -alertmanager.sharding-ring.tokens-file-path
[tokens_file_path: <string> | default = ""]

# Set to true to enable ring detailed metrics. These metrics provide detailed
# information, such as token count and ownership per tenant. Disabling them
# can significantly decrease the number of metrics emitted.
# CLI flag: -alertmanager.sharding-ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# The sleep seconds when alertmanager is shutting down. Need to be close to or
# larger than KV Store information propagation delay
# CLI flag: -alertmanager.sharding-ring.final-sleep
Expand Down Expand Up @@ -2527,6 +2533,12 @@ sharding_ring:
# CLI flag: -compactor.auto-forget-delay
[auto_forget_delay: <duration> | default = 2m]

# Set to true to enable ring detailed metrics. These metrics provide detailed
# information, such as token count and ownership per tenant. Disabling them
# can significantly decrease the number of metrics emitted.
# CLI flag: -compactor.ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# Minimum time to wait for ring stability at startup. 0 to disable.
# CLI flag: -compactor.ring.wait-stability-min-duration
[wait_stability_min_duration: <duration> | default = 1m]
Expand Down Expand Up @@ -2948,6 +2960,12 @@ ring:
# CLI flag: -distributor.ring.heartbeat-timeout
[heartbeat_timeout: <duration> | default = 1m]

# Set to true to enable ring detailed metrics. These metrics provide detailed
# information, such as token count and ownership per tenant. Disabling them
# can significantly decrease the number of metrics emitted.
# CLI flag: -distributor.ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# Name of network interface to read address from.
# CLI flag: -distributor.ring.instance-interface-names
[instance_interface_names: <list of string> | default = [eth0 en0]]
Expand Down Expand Up @@ -5102,6 +5120,12 @@ ring:
# CLI flag: -ruler.ring.tokens-file-path
[tokens_file_path: <string> | default = ""]

# Set to true to enable ring detailed metrics. These metrics provide detailed
# information, such as token count and ownership per tenant. Disabling them
# can significantly decrease the number of metrics emitted.
# CLI flag: -ruler.ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# Name of network interface to read address from.
# CLI flag: -ruler.ring.instance-interface-names
[instance_interface_names: <list of string> | default = [eth0 en0]]
Expand Down Expand Up @@ -6121,6 +6145,12 @@ sharding_ring:
# CLI flag: -store-gateway.sharding-ring.keep-instance-in-the-ring-on-shutdown
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]

# Set to true to enable ring detailed metrics. These metrics provide detailed
# information, such as token count and ownership per tenant. Disabling them
# can significantly decrease the number of metrics emitted.
# CLI flag: -store-gateway.sharding-ring.detailed-metrics-enabled
[detailed_metrics_enabled: <boolean> | default = true]

# Minimum time to wait for ring stability at startup. 0 to disable.
# CLI flag: -store-gateway.sharding-ring.wait-stability-min-duration
[wait_stability_min_duration: <duration> | default = 1m]
Expand Down
15 changes: 9 additions & 6 deletions pkg/alertmanager/alertmanager_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@ var SyncRingOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE, ring.JOINING}, fun
// is used to strip down the config to the minimum, and avoid confusion
// to the user.
type RingConfig struct {
KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances."`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
ReplicationFactor int `yaml:"replication_factor"`
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
TokensFilePath string `yaml:"tokens_file_path"`
KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances."`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
ReplicationFactor int `yaml:"replication_factor"`
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
TokensFilePath string `yaml:"tokens_file_path"`
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`

FinalSleep time.Duration `yaml:"final_sleep"`
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
Expand Down Expand Up @@ -88,6 +89,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&cfg.ReplicationFactor, rfprefix+"replication-factor", 3, "The replication factor to use when sharding the alertmanager.")
f.BoolVar(&cfg.ZoneAwarenessEnabled, rfprefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate alerts across different availability zones.")
f.StringVar(&cfg.TokensFilePath, rfprefix+"tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")
f.BoolVar(&cfg.DetailedMetricsEnabled, rfprefix+"detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
Expand Down Expand Up @@ -134,6 +136,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
rc.ReplicationFactor = cfg.ReplicationFactor
rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled

return rc
}
11 changes: 7 additions & 4 deletions pkg/compactor/compactor_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (
// is used to strip down the config to the minimum, and avoid confusion
// to the user.
type RingConfig struct {
KVStore kv.Config `yaml:"kvstore"`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
AutoForgetDelay time.Duration `yaml:"auto_forget_delay"`
KVStore kv.Config `yaml:"kvstore"`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
AutoForgetDelay time.Duration `yaml:"auto_forget_delay"`
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`

// Wait ring stability.
WaitStabilityMinDuration time.Duration `yaml:"wait_stability_min_duration"`
Expand Down Expand Up @@ -55,6 +56,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
cfg.KVStore.RegisterFlagsWithPrefix("compactor.ring.", "collectors/", f)
f.DurationVar(&cfg.HeartbeatPeriod, "compactor.ring.heartbeat-period", 5*time.Second, "Period at which to heartbeat to the ring. 0 = disabled.")
f.DurationVar(&cfg.HeartbeatTimeout, "compactor.ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which compactors are considered unhealthy within the ring. 0 = never (timeout disabled).")
f.BoolVar(&cfg.DetailedMetricsEnabled, "compactor.ring.detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")
f.DurationVar(&cfg.AutoForgetDelay, "compactor.auto-forget-delay", 2*cfg.HeartbeatTimeout, "Time since last heartbeat before compactor will be removed from ring. 0 to disable")

// Wait stability flags.
Expand Down Expand Up @@ -89,6 +91,7 @@ func (cfg *RingConfig) ToLifecyclerConfig() ring.LifecyclerConfig {
rc.KVStore = cfg.KVStore
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
rc.ReplicationFactor = 1
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled

// Configure lifecycler
lc.RingConfig = rc
Expand Down
9 changes: 6 additions & 3 deletions pkg/distributor/distributor_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ import (
// is used to strip down the config to the minimum, and avoid confusion
// to the user.
type RingConfig struct {
KVStore kv.Config `yaml:"kvstore"`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
KVStore kv.Config `yaml:"kvstore"`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`

// Instance details
InstanceID string `yaml:"instance_id" doc:"hidden"`
Expand All @@ -44,6 +45,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
cfg.KVStore.RegisterFlagsWithPrefix("distributor.ring.", "collectors/", f)
f.DurationVar(&cfg.HeartbeatPeriod, "distributor.ring.heartbeat-period", 5*time.Second, "Period at which to heartbeat to the ring. 0 = disabled.")
f.DurationVar(&cfg.HeartbeatTimeout, "distributor.ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which distributors are considered unhealthy within the ring. 0 = never (timeout disabled).")
f.BoolVar(&cfg.DetailedMetricsEnabled, "distributor.ring.detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
Expand Down Expand Up @@ -94,6 +96,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
rc.KVStore = cfg.KVStore
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
rc.ReplicationFactor = 1
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled

return rc
}
15 changes: 9 additions & 6 deletions pkg/ruler/ruler_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ var ListRuleRingOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE, ring.LEAVING},
// is used to strip down the config to the minimum, and avoid confusion
// to the user.
type RingConfig struct {
KVStore kv.Config `yaml:"kvstore"`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
ReplicationFactor int `yaml:"replication_factor"`
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
TokensFilePath string `yaml:"tokens_file_path"`
KVStore kv.Config `yaml:"kvstore"`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period"`
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
ReplicationFactor int `yaml:"replication_factor"`
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
TokensFilePath string `yaml:"tokens_file_path"`
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`

// Instance details
InstanceID string `yaml:"instance_id" doc:"hidden"`
Expand Down Expand Up @@ -77,6 +78,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&cfg.ReplicationFactor, "ruler.ring.replication-factor", 1, "EXPERIMENTAL: The replication factor to use when loading rule groups for API HA.")
f.BoolVar(&cfg.ZoneAwarenessEnabled, "ruler.ring.zone-awareness-enabled", false, "EXPERIMENTAL: True to enable zone-awareness and load rule groups across different availability zones for API HA.")
f.StringVar(&cfg.TokensFilePath, "ruler.ring.tokens-file-path", "", "EXPERIMENTAL: File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")
f.BoolVar(&cfg.DetailedMetricsEnabled, "ruler.ring.detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
Expand Down Expand Up @@ -119,6 +121,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
rc.HeartbeatTimeout = cfg.HeartbeatTimeout
rc.SubringCacheDisabled = true
rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled

// Each rule group is evaluated by *exactly* one ruler, but it can be loaded by multiple rulers for API HA
rc.ReplicationFactor = cfg.ReplicationFactor
Expand Down
3 changes: 3 additions & 0 deletions pkg/storegateway/gateway_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type RingConfig struct {
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
KeepInstanceInTheRingOnShutdown bool `yaml:"keep_instance_in_the_ring_on_shutdown"`
ZoneStableShuffleSharding bool `yaml:"zone_stable_shuffle_sharding" doc:"hidden"`
DetailedMetricsEnabled bool `yaml:"detailed_metrics_enabled"`

// Wait ring stability.
WaitStabilityMinDuration time.Duration `yaml:"wait_stability_min_duration"`
Expand Down Expand Up @@ -107,6 +108,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.BoolVar(&cfg.ZoneAwarenessEnabled, ringFlagsPrefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate blocks across different availability zones.")
f.BoolVar(&cfg.KeepInstanceInTheRingOnShutdown, ringFlagsPrefix+"keep-instance-in-the-ring-on-shutdown", false, "True to keep the store gateway instance in the ring when it shuts down. The instance will then be auto-forgotten from the ring after 10*heartbeat_timeout.")
f.BoolVar(&cfg.ZoneStableShuffleSharding, ringFlagsPrefix+"zone-stable-shuffle-sharding", true, "If true, use zone stable shuffle sharding algorithm. Otherwise, use the default shuffle sharding algorithm.")
f.BoolVar(&cfg.DetailedMetricsEnabled, ringFlagsPrefix+"detailed-metrics-enabled", true, "Set to true to enable ring detailed metrics. These metrics provide detailed information, such as token count and ownership per tenant. Disabling them can significantly decrease the number of metrics emitted.")

// Wait stability flags.
f.DurationVar(&cfg.WaitStabilityMinDuration, ringFlagsPrefix+"wait-stability-min-duration", time.Minute, "Minimum time to wait for ring stability at startup. 0 to disable.")
Expand Down Expand Up @@ -138,6 +140,7 @@ func (cfg *RingConfig) ToRingConfig() ring.Config {
rc.ReplicationFactor = cfg.ReplicationFactor
rc.ZoneAwarenessEnabled = cfg.ZoneAwarenessEnabled
rc.SubringCacheDisabled = true
rc.DetailedMetricsEnabled = cfg.DetailedMetricsEnabled

return rc
}
Expand Down
Loading