Skip to content

Commit e708d94

Browse files
authored
Merge pull request grafana#394 from stevesg/josh-alertmanager
Alertmanager: Add sharding configuration.
2 parents b38c8c1 + 0fbcc82 commit e708d94

File tree

3 files changed

+60
-30
lines changed

3 files changed

+60
-30
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
* `CortexAlertmanagerPersistStateFailing`
5656
* `CortexAlertmanagerInitialSyncFailed`
5757
* [ENHANCEMENT] Add support for Azure storage in Alertmanager configuration. #381
58+
* [ENHANCEMENT] Add support for running Alertmanager in sharding mode. #394
5859
* [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308
5960
* [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329
6061
* [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335

cortex/alertmanager.libsonnet

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,62 @@
77
local service = $.core.v1.service,
88
local configMap = $.core.v1.configMap,
99

10-
local isHA = $._config.alertmanager.replicas > 1,
10+
// The Alertmanager has three operational modes.
11+
local haType = if $._config.alertmanager.sharding_enabled then
12+
'sharding'
13+
else if $._config.alertmanager.replicas > 1 then
14+
'gossip_multi_replica'
15+
else
16+
'gossip_single_replica',
17+
// mode represents which operational mode the alertmanager runs in.
18+
// ports: array of container ports used for gossiping.
19+
// args: arguments that are eventually converted to flags on the container
20+
// flags: arguments directly added to the container. For legacy reasons, we need to use -- as a prefix for some flags.
21+
// service: the service definition
22+
local mode = {
23+
sharding: {
24+
ports: [],
25+
args: {
26+
'alertmanager.sharding-enabled': true,
27+
'alertmanager.sharding-ring.store': $._config.alertmanager.ring_store,
28+
'alertmanager.sharding-ring.consul.hostname': $._config.alertmanager.ring_hostname,
29+
'alertmanager.sharding-ring.replication-factor': $._config.alertmanager.ring_replication_factor,
30+
},
31+
flags: [],
32+
service:
33+
$.util.serviceFor($.alertmanager_statefulset) +
34+
service.mixin.spec.withClusterIp('None'),
35+
},
36+
gossip_multi_replica: {
37+
ports: [
38+
$.core.v1.containerPort.newUDP('gossip-udp', $._config.alertmanager.gossip_port),
39+
$.core.v1.containerPort.new('gossip-tcp', $._config.alertmanager.gossip_port),
40+
],
41+
args: {},
42+
flags: [
43+
'--alertmanager.cluster.listen-address=[$(POD_IP)]:%s' % $._config.alertmanager.gossip_port,
44+
'--alertmanager.cluster.peers=%s' % std.join(',', peers),
45+
],
46+
service:
47+
$.util.serviceFor($.alertmanager_statefulset) +
48+
service.mixin.spec.withClusterIp('None'),
49+
},
50+
gossip_single_replica: {
51+
ports: [],
52+
args: {},
53+
flags: ['--alertmanager.cluster.listen-address=""'],
54+
service: $.util.serviceFor($.alertmanager_statefulset),
55+
},
56+
}[haType],
1157
local hasFallbackConfig = std.length($._config.alertmanager.fallback_config) > 0,
12-
local peers = if isHA then
13-
[
14-
'alertmanager-%d.alertmanager.%s.svc.%s.local:%s' % [i, $._config.namespace, $._config.cluster, $._config.alertmanager.gossip_port]
15-
for i in std.range(0, $._config.alertmanager.replicas - 1)
16-
]
17-
else [],
18-
58+
local peers = [
59+
'alertmanager-%d.alertmanager.%s.svc.%s.local:%s' % [i, $._config.namespace, $._config.cluster, $._config.alertmanager.gossip_port]
60+
for i in std.range(0, $._config.alertmanager.replicas - 1)
61+
],
1962
alertmanager_args::
2063
$._config.grpcConfig +
2164
$._config.alertmanagerStorageClientConfig +
65+
mode.args +
2266
{
2367
target: 'alertmanager',
2468
'log.level': 'debug',
@@ -51,24 +95,11 @@
5195
alertmanager_container::
5296
if $._config.alertmanager_enabled then
5397
container.new('alertmanager', $._images.alertmanager) +
54-
container.withPorts(
55-
$.util.defaultPorts +
56-
if isHA then [
57-
$.core.v1.containerPort.newUDP('gossip-udp', $._config.alertmanager.gossip_port),
58-
$.core.v1.containerPort.new('gossip-tcp', $._config.alertmanager.gossip_port),
59-
]
60-
else [],
61-
) +
98+
container.withPorts($.util.defaultPorts + mode.ports) +
6299
container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) +
63100
container.withArgsMixin(
64101
$.util.mapToFlags($.alertmanager_args) +
65-
(
66-
if isHA then
67-
['--alertmanager.cluster.listen-address=[$(POD_IP)]:%s' % $._config.alertmanager.gossip_port] +
68-
['--alertmanager.cluster.peers=%s' % std.join(',', peers)]
69-
else
70-
['-alertmanager.cluster.listen-address=""']
71-
)
102+
mode.flags
72103
) +
73104
container.withVolumeMountsMixin(
74105
[volumeMount.new('alertmanager-data', '/data')] +
@@ -101,11 +132,5 @@
101132
else {},
102133

103134
alertmanager_service:
104-
if $._config.alertmanager_enabled then
105-
if isHA then
106-
$.util.serviceFor($.alertmanager_statefulset) +
107-
service.mixin.spec.withClusterIp('None')
108-
else
109-
$.util.serviceFor($.alertmanager_statefulset)
110-
else {},
135+
if $._config.alertmanager_enabled then mode.service else {},
111136
}

cortex/config.libsonnet

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,12 @@
276276

277277
alertmanager: {
278278
replicas: 3,
279+
sharding_enabled: false,
279280
gossip_port: 9094,
280281
fallback_config: {},
282+
ring_store: 'consul',
283+
ring_hostname: 'consul.%s.svc.cluster.local:8500' % $._config.namespace,
284+
ring_replication_factor: $._config.replication_factor,
281285
},
282286

283287
alertmanager_client_type: error 'you must specify a storage backend type for the alertmanager (azure, gcs, s3, local)',

0 commit comments

Comments
 (0)