@@ -11,6 +11,7 @@ import (
1111 "github.com/uber/cadence/common/log"
1212 "github.com/uber/cadence/common/log/tag"
1313 "github.com/uber/cadence/common/metrics"
14+ "github.com/uber/cadence/common/types"
1415)
1516
1617const (
@@ -53,10 +54,11 @@ func NewMitigator(
5354 options * MitigatorOptions ,
5455) Mitigator {
5556 m := & mitigatorImpl {
56- monitor : monitor ,
57- logger : logger ,
58- metricsScope : metricsScope ,
59- options : options ,
57+ virtualQueueManager : virtualQueueManager ,
58+ monitor : monitor ,
59+ logger : logger ,
60+ metricsScope : metricsScope ,
61+ options : options ,
6062 }
6163 m .handlers = map [AlertType ]func (Alert ){
6264 AlertTypeQueuePendingTaskCount : m .handleQueuePendingTaskCount ,
@@ -83,17 +85,49 @@ func (m *mitigatorImpl) handleQueuePendingTaskCount(alert Alert) {
8385 virtualQueue .UpdateAndGetState ()
8486 }
8587 if m .monitor .GetTotalPendingTaskCount () <= alert .AlertAttributesQueuePendingTaskCount .CriticalPendingTaskCount {
88+ m .logger .Debug ("mitigating queue alert, skip mitigation because the alert is no longer valid" )
8689 return
8790 }
8891 // Second, getting the stats of pending tasks. We need:
8992 stats := m .collectPendingTaskStats ()
9093
9194 // Third, find virtual slices to split given the target pending task count and the stats of pending tasks
9295 targetPendingTaskCount := int (float64 (alert .AlertAttributesQueuePendingTaskCount .CriticalPendingTaskCount ) * targetLoadFactor )
96+ if m .logger .DebugOn () {
97+ sliceStatesPerDomain := make (map [string ][]* types.VirtualSliceState )
98+ for domain , slices := range stats .slicesPerDomain {
99+ for _ , s := range slices {
100+ sliceStatesPerDomain [domain ] = append (sliceStatesPerDomain [domain ], ToPersistenceVirtualSliceState (s .GetState ()))
101+ }
102+ }
103+ for s , domainStats := range stats .pendingTaskCountPerDomainPerSlice {
104+ m .logger .Debug ("mitigating queue alert, get task stats per slice" , tag .Dynamic ("slice" , ToPersistenceVirtualSliceState (s .GetState ())), tag .Dynamic ("domain-stats" , domainStats ))
105+ }
106+ m .logger .Debug ("mitigating queue alert, get task stats" ,
107+ tag .AlertType (int (alert .AlertType )),
108+ tag .Dynamic ("pending-task-count-per-domain" , stats .pendingTaskCountPerDomain ),
109+ tag .Dynamic ("slices-per-domain" , sliceStatesPerDomain ),
110+ tag .Dynamic ("pending-task-count" , stats .totalPendingTaskCount ),
111+ tag .Dynamic ("target-task-count" , targetPendingTaskCount ),
112+ )
113+ }
93114 domainsToClearPerSlice := m .findDomainsToClear (stats , targetPendingTaskCount )
115+ if m .logger .DebugOn () {
116+ for s , domains := range domainsToClearPerSlice {
117+ m .logger .Debug ("mitigating queue alert, get domains to clear" , tag .Dynamic ("slice" , ToPersistenceVirtualSliceState (s .GetState ())), tag .WorkflowDomainIDs (domains ))
118+ }
119+ }
94120
95121 // Finally, split and clear the slices
96122 m .processQueueSplitsAndClear (virtualQueues , domainsToClearPerSlice )
123+ if m .logger .DebugOn () {
124+ virtualQueues := m .virtualQueueManager .VirtualQueues ()
125+ state := make (map [int64 ]* types.VirtualQueueState )
126+ for queueID , vq := range virtualQueues {
127+ state [queueID ] = ToPersistenceVirtualQueueState (vq .GetState ())
128+ }
129+ m .logger .Debug ("mitigating queue alert, get queue state after mitigation" , tag .Dynamic ("queue-state" , state ))
130+ }
97131}
98132
99133// The stats of pending tasks are used to calculate the domains to clear. We need:
0 commit comments