Skip to content

Commit cfca43b

Browse files
authored
Set default conn timeout of 5 sec for storeGateway and alertmanager clients (#6603)
* Set default conn timeout for SG cals Signed-off-by: Daniel Deluiggi <[email protected]> * Changelog Signed-off-by: Daniel Deluiggi <[email protected]> * Add 5s to alertmanager Signed-off-by: Daniel Deluiggi <[email protected]> * Docs Signed-off-by: Daniel Deluiggi <[email protected]> * Changelog Signed-off-by: Daniel Deluiggi <[email protected]> * Rollback description change Signed-off-by: Daniel Deluiggi <[email protected]> --------- Signed-off-by: Daniel Deluiggi <[email protected]>
1 parent 148a82a commit cfca43b

File tree

5 files changed

+22
-0
lines changed

5 files changed

+22
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Changelog
22

33
## master / unreleased
4+
* [CHANGE] StoreGateway/Alertmanager: Add default 5s connection timeout on client. #6603
45
* [FEATURE] Query Frontend: Add dynamic interval size for query splitting. This is enabled by configuring experimental flags `querier.max-shards-per-query` and/or `querier.max-fetched-data-duration-per-query`. The split interval size is dynamically increased to maintain a number of shards and total duration fetched below the configured values. #6458
56
* [FEATURE] Querier/Ruler: Add `query_partial_data` and `rules_partial_data` limits to allow queries/rules to be evaluated with data from a single zone, if other zones are not available. #6526
67
* [FEATURE] Update prometheus alertmanager version to v0.28.0 and add new integration msteamsv2, jira, and rocketchat. #6590

docs/blocks-storage/querier.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ querier:
222222
# CLI flag: -querier.store-gateway-client.healthcheck.timeout
223223
[timeout: <duration> | default = 1s]
224224

225+
# The maximum amount of time to establish a connection. A value of 0 means
226+
# using default gRPC client connect timeout 5s.
227+
# CLI flag: -querier.store-gateway-client.connect-timeout
228+
[connect_timeout: <duration> | default = 5s]
229+
225230
# If enabled, store gateway query stats will be logged using `info` log level.
226231
# CLI flag: -querier.store-gateway-query-stats-enabled
227232
[store_gateway_query_stats: <boolean> | default = true]

docs/configuration/config-file-reference.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,11 @@ alertmanager_client:
503503
# CLI flag: -alertmanager.alertmanager-client.grpc-max-send-msg-size
504504
[max_send_msg_size: <int> | default = 4194304]
505505
506+
# The maximum amount of time to establish a connection. A value of 0 means
507+
# using default gRPC client connect timeout 5s.
508+
# CLI flag: -alertmanager.alertmanager-client.connect-timeout
509+
[connect_timeout: <duration> | default = 5s]
510+
506511
# The interval between persisting the current alertmanager state (notification
507512
# log and silences) to object storage. This is only used when sharding is
508513
# enabled. This state is read when all replicas for a shard can not be
@@ -4100,6 +4105,11 @@ store_gateway_client:
41004105
# CLI flag: -querier.store-gateway-client.healthcheck.timeout
41014106
[timeout: <duration> | default = 1s]
41024107
4108+
# The maximum amount of time to establish a connection. A value of 0 means
4109+
# using default gRPC client connect timeout 5s.
4110+
# CLI flag: -querier.store-gateway-client.connect-timeout
4111+
[connect_timeout: <duration> | default = 5s]
4112+
41034113
# If enabled, store gateway query stats will be logged using `info` log level.
41044114
# CLI flag: -querier.store-gateway-query-stats-enabled
41054115
[store_gateway_query_stats: <boolean> | default = true]

pkg/alertmanager/alertmanager_client.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ type ClientConfig struct {
4040
GRPCCompression string `yaml:"grpc_compression"`
4141
MaxRecvMsgSize int `yaml:"max_recv_msg_size"`
4242
MaxSendMsgSize int `yaml:"max_send_msg_size"`
43+
ConnectTimeout time.Duration `yaml:"connect_timeout"`
4344
}
4445

4546
// RegisterFlagsWithPrefix registers flags with prefix.
@@ -50,6 +51,7 @@ func (cfg *ClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet)
5051
cfg.TLS.RegisterFlagsWithPrefix(prefix, f)
5152
f.IntVar(&cfg.MaxRecvMsgSize, prefix+".grpc-max-recv-msg-size", 16*1024*1024, "gRPC client max receive message size (bytes).")
5253
f.IntVar(&cfg.MaxSendMsgSize, prefix+".grpc-max-send-msg-size", 4*1024*1024, "gRPC client max send message size (bytes).")
54+
f.DurationVar(&cfg.ConnectTimeout, prefix+".connect-timeout", 5*time.Second, "The maximum amount of time to establish a connection. A value of 0 means using default gRPC client connect timeout 5s.")
5355
}
5456

5557
type alertmanagerClientsPool struct {
@@ -67,6 +69,7 @@ func newAlertmanagerClientsPool(discovery client.PoolServiceDiscovery, amClientC
6769
BackoffOnRatelimits: false,
6870
TLSEnabled: amClientCfg.TLSEnabled,
6971
TLS: amClientCfg.TLS,
72+
ConnectTimeout: amClientCfg.ConnectTimeout,
7073
}
7174

7275
requestDuration := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{

pkg/querier/store_gateway_client.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ func newStoreGatewayClientPool(discovery client.PoolServiceDiscovery, clientConf
7979
BackoffOnRatelimits: false,
8080
TLSEnabled: clientConfig.TLSEnabled,
8181
TLS: clientConfig.TLS,
82+
ConnectTimeout: clientConfig.ConnectTimeout,
8283
},
8384
HealthCheckConfig: clientConfig.HealthCheckConfig,
8485
}
@@ -103,11 +104,13 @@ type ClientConfig struct {
103104
TLS tls.ClientConfig `yaml:",inline"`
104105
GRPCCompression string `yaml:"grpc_compression"`
105106
HealthCheckConfig grpcclient.HealthCheckConfig `yaml:"healthcheck_config" doc:"description=EXPERIMENTAL: If enabled, gRPC clients perform health checks for each target and fail the request if the target is marked as unhealthy."`
107+
ConnectTimeout time.Duration `yaml:"connect_timeout"`
106108
}
107109

108110
func (cfg *ClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
109111
f.BoolVar(&cfg.TLSEnabled, prefix+".tls-enabled", cfg.TLSEnabled, "Enable TLS for gRPC client connecting to store-gateway.")
110112
f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", "Use compression when sending messages. Supported values are: 'gzip', 'snappy' and '' (disable compression)")
113+
f.DurationVar(&cfg.ConnectTimeout, prefix+".connect-timeout", 5*time.Second, "The maximum amount of time to establish a connection. A value of 0 means using default gRPC client connect timeout 5s.")
111114
cfg.TLS.RegisterFlagsWithPrefix(prefix, f)
112115
cfg.HealthCheckConfig.RegisterFlagsWithPrefix(prefix, f)
113116
}

0 commit comments

Comments
 (0)