prometheus
diff --git a/‎cmd/alertmanager/main.go‎
Lines changed: 18 additions & 3 deletions b/‎cmd/alertmanager/main.go‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎config/coordinator.go‎
Lines changed: 9 additions & 5 deletions b/‎config/coordinator.go‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎config/coordinator_test.go‎
Lines changed: 2 additions & 2 deletions b/‎config/coordinator_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dispatch/dispatch.go‎
Lines changed: 77 additions & 37 deletions b/‎dispatch/dispatch.go‎
Lines changed: 77 additions & 37 deletions
@@ -64,6 +64,7 @@ import (
 )
 
 var (
+	startTime       = time.Now()
 	requestDuration = promauto.NewHistogramVec(
 		prometheus.HistogramOpts{
 			Name:                            "alertmanager_http_request_duration_seconds",
@@ -143,6 +144,7 @@ func run() int {
 		maxSilenceSizeBytes         = kingpin.Flag("silences.max-silence-size-bytes", "Maximum silence size in bytes. If negative or zero, no limit is set.").Default("0").Int()
 		alertGCInterval             = kingpin.Flag("alerts.gc-interval", "Interval between alert GC.").Default("30m").Duration()
 		dispatchMaintenanceInterval = kingpin.Flag("dispatch.maintenance-interval", "Interval between maintenance of aggregation groups in the dispatcher.").Default("30s").Duration()
+		DispatchStartDelay          = kingpin.Flag("dispatch.start-delay", "Minimum amount of time to wait before dispatching alerts. This option should be synced with value of --rules.alert.resend-delay on Prometheus.").Default("0s").Duration()
 
 		webConfig      = webflag.AddFlags(kingpin.CommandLine, ":9093")
 		externalURL    = kingpin.Flag("web.external-url", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.").String()
@@ -413,7 +415,7 @@ func run() int {
 		prometheus.DefaultRegisterer,
 		configLogger,
 	)
-	configCoordinator.Subscribe(func(conf *config.Config) error {
+	configCoordinator.Subscribe(func(conf *config.Config, initial bool) error {
 		tmpl, err = template.FromGlobs(conf.Templates)
 		if err != nil {
 			return fmt.Errorf("failed to parse templates: %w", err)
@@ -491,7 +493,17 @@ func run() int {
 			silencer.Mutes(labels)
 		})
 
-		disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, *dispatchMaintenanceInterval, nil, logger, dispMetrics)
+		disp = dispatch.NewDispatcher(
+			alerts,
+			routes,
+			pipeline,
+			marker,
+			timeoutFunc,
+			*dispatchMaintenanceInterval,
+			nil,
+			logger,
+			dispMetrics,
+		)
 		routes.Walk(func(r *dispatch.Route) {
 			if r.RouteOpts.RepeatInterval > *retention {
 				configLogger.Warn(
@@ -518,7 +530,10 @@ func run() int {
 			}
 		})
 
-		go disp.Run()
+		go disp.Run(
+			startTime.Add(*DispatchStartDelay),
+			initial, // signal to the dispatcher that this is the initial config load
+		)
 		go inhibitor.Run()
 
 		return nil
 
@@ -30,9 +30,10 @@ type Coordinator struct {
 	logger         *slog.Logger
 
 	// Protects config and subscribers
-	mutex       sync.Mutex
-	config      *Config
-	subscribers []func(*Config) error
+	mutex         sync.Mutex
+	config        *Config
+	subscribers   []func(*Config, bool) error
+	initialReload bool
 
 	configHashMetric        prometheus.Gauge
 	configSuccessMetric     prometheus.Gauge
@@ -46,6 +47,7 @@ func NewCoordinator(configFilePath string, r prometheus.Registerer, l *slog.Logg
 	c := &Coordinator{
 		configFilePath: configFilePath,
 		logger:         l,
+		initialReload:  true,
 	}
 
 	c.registerMetrics(r)
@@ -73,7 +75,7 @@ func (c *Coordinator) registerMetrics(r prometheus.Registerer) {
 }
 
 // Subscribe subscribes the given Subscribers to configuration changes.
-func (c *Coordinator) Subscribe(ss ...func(*Config) error) {
+func (c *Coordinator) Subscribe(ss ...func(*Config, bool) error) {
 	c.mutex.Lock()
 	defer c.mutex.Unlock()
 
@@ -82,11 +84,13 @@ func (c *Coordinator) Subscribe(ss ...func(*Config) error) {
 
 func (c *Coordinator) notifySubscribers() error {
 	for _, s := range c.subscribers {
-		if err := s(c.config); err != nil {
+		if err := s(c.config, c.initialReload); err != nil {
 			return err
 		}
 	}
 
+	// Set initialReload to false after the first notification.
+	c.initialReload = false
 	return nil
 }
 
 
@@ -49,7 +49,7 @@ func TestCoordinatorRegistersMetrics(t *testing.T) {
 func TestCoordinatorNotifiesSubscribers(t *testing.T) {
 	callBackCalled := false
 	c := NewCoordinator("testdata/conf.good.yml", prometheus.NewRegistry(), promslog.NewNopLogger())
-	c.Subscribe(func(*Config) error {
+	c.Subscribe(func(*Config, bool) error {
 		callBackCalled = true
 		return nil
 	})
@@ -68,7 +68,7 @@ func TestCoordinatorFailReloadWhenSubscriberFails(t *testing.T) {
 	errMessage := "something happened"
 	c := NewCoordinator("testdata/conf.good.yml", prometheus.NewRegistry(), promslog.NewNopLogger())
 
-	c.Subscribe(func(*Config) error {
+	c.Subscribe(func(*Config, bool) error {
 		return errors.New(errMessage)
 	})
 
 
@@ -32,6 +32,12 @@ import (
 	"github.com/prometheus/alertmanager/types"
 )
 
+const (
+	DISPATCHER_STATE_UNKNOWN = iota
+	DISPATCHER_STATE_STARTING
+	DISPATCHER_STATE_RUNNING
+)
+
 // DispatcherMetrics represents metrics associated to a dispatcher.
 type DispatcherMetrics struct {
 	aggrGroups            prometheus.Gauge
@@ -90,6 +96,9 @@ type Dispatcher struct {
 	cancel              func()
 
 	logger *slog.Logger
+
+	minDispatchTime time.Time
+	state           int
 }
 
 // Limits describes limits used by Dispatcher.
@@ -102,39 +111,47 @@ type Limits interface {
 
 // NewDispatcher returns a new Dispatcher.
 func NewDispatcher(
-	ap provider.Alerts,
-	r *Route,
-	s notify.Stage,
-	mk types.GroupMarker,
-	to func(time.Duration) time.Duration,
-	mi time.Duration,
-	lim Limits,
-	l *slog.Logger,
-	m *DispatcherMetrics,
+	alerts provider.Alerts,
+	route *Route,
+	stage notify.Stage,
+	marker types.GroupMarker,
+	timeout func(time.Duration) time.Duration,
+	maintenanceInterval time.Duration,
+	limits Limits,
+	logger *slog.Logger,
+	metrics *DispatcherMetrics,
 ) *Dispatcher {
-	if lim == nil {
-		lim = nilLimits{}
+	if limits == nil {
+		limits = nilLimits{}
 	}
 
 	disp := &Dispatcher{
-		alerts:              ap,
-		stage:               s,
-		route:               r,
-		marker:              mk,
-		timeout:             to,
-		maintenanceInterval: mi,
-		logger:              l.With("component", "dispatcher"),
-		metrics:             m,
-		limits:              lim,
+		alerts:              alerts,
+		stage:               stage,
+		route:               route,
+		marker:              marker,
+		timeout:             timeout,
+		maintenanceInterval: maintenanceInterval,
+		logger:              logger.With("component", "dispatcher"),
+		metrics:             metrics,
+		limits:              limits,
+		state:               DISPATCHER_STATE_STARTING,
 	}
 	return disp
 }
 
 // Run starts dispatching alerts incoming via the updates channel.
-func (d *Dispatcher) Run() {
+func (d *Dispatcher) Run(minDispatchTime time.Time, initial bool) {
 	d.done = make(chan struct{})
 
 	d.mtx.Lock()
+	d.minDispatchTime = minDispatchTime
+	// If this is not the initial run, set the state to running already.
+	d.logger.Debug("Starting dispatcher", "initialRun", initial)
+	if !initial {
+		d.logger.Debug("Setting dispatcher state to 'running'")
+		d.state = DISPATCHER_STATE_RUNNING
+	}
 	d.aggrGroupsPerRoute = map[*Route]map[model.Fingerprint]*aggrGroup{}
 	d.aggrGroupsNum = 0
 	d.metrics.aggrGroups.Set(0)
@@ -347,6 +364,31 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) {
 	// alert is already there.
 	ag.insert(alert)
 
+	// Check dispatcher and alert state to determine if we should start dispatching the alert.
+	now := time.Now()
+	switch d.state {
+	case DISPATCHER_STATE_STARTING:
+		if now.After(d.minDispatchTime) {
+			d.logger.Debug("Setting dispatch state to: running")
+			d.state = DISPATCHER_STATE_RUNNING
+		} else {
+			// Reset timer to the time when the dispatcher will be in running state.
+			ag.resetTimer(time.Until(d.minDispatchTime))
+		}
+	case DISPATCHER_STATE_RUNNING:
+		if alert.StartsAt.Before(now) {
+			ag.logger.Debug(
+				"Alert is old enough for immediate flush, resetting timer to zero",
+				"alert", alert.Name(),
+				"fingerprint", alert.Fingerprint(),
+				"startsAt", alert.StartsAt,
+			)
+			ag.resetTimer(0)
+		}
+	default:
+		d.logger.Warn("Unknow dispatcher state", "state", d.state)
+	}
+
 	go ag.run(func(ctx context.Context, alerts ...*types.Alert) bool {
 		_, _, err := d.stage.Exec(ctx, d.logger, alerts...)
 		if err != nil {
@@ -392,13 +434,17 @@ type aggrGroup struct {
 	done    chan struct{}
 	next    *time.Timer
 	timeout func(time.Duration) time.Duration
-
-	mtx        sync.RWMutex
-	hasFlushed bool
 }
 
 // newAggrGroup returns a new aggregation group.
-func newAggrGroup(ctx context.Context, labels model.LabelSet, r *Route, to func(time.Duration) time.Duration, marker types.AlertMarker, logger *slog.Logger) *aggrGroup {
+func newAggrGroup(
+	ctx context.Context,
+	labels model.LabelSet,
+	r *Route,
+	to func(time.Duration) time.Duration,
+	marker types.AlertMarker,
+	logger *slog.Logger,
+) *aggrGroup {
 	if to == nil {
 		to = func(d time.Duration) time.Duration { return d }
 	}
@@ -462,10 +508,7 @@ func (ag *aggrGroup) run(nf notifyFunc) {
 			ctx = notify.WithRouteID(ctx, ag.routeID)
 
 			// Wait the configured interval before calling flush again.
-			ag.mtx.Lock()
-			ag.next.Reset(ag.opts.GroupInterval)
-			ag.hasFlushed = true
-			ag.mtx.Unlock()
+			ag.resetTimer(ag.opts.GroupInterval)
 
 			ag.flush(func(alerts ...*types.Alert) bool {
 				return nf(ctx, alerts...)
@@ -486,19 +529,16 @@ func (ag *aggrGroup) stop() {
 	<-ag.done
 }
 
+// resetTimer resets the timer for the AG.
+func (ag *aggrGroup) resetTimer(t time.Duration) {
+	ag.next.Reset(t)
+}
+
 // insert inserts the alert into the aggregation group.
 func (ag *aggrGroup) insert(alert *types.Alert) {
 	if err := ag.alerts.Set(alert); err != nil {
 		ag.logger.Error("error on set alert", "err", err)
 	}
-
-	// Immediately trigger a flush if the wait duration for this
-	// alert is already over.
-	ag.mtx.Lock()
-	defer ag.mtx.Unlock()
-	if !ag.hasFlushed && alert.StartsAt.Add(ag.opts.GroupWait).Before(time.Now()) {
-		ag.next.Reset(0)
-	}
 }
 
 func (ag *aggrGroup) empty() bool {