From 586ac52748844c4f2cf68ebf2c25d4517c54cb48 Mon Sep 17 00:00:00 2001 From: Jacob Moody Date: Tue, 28 Oct 2025 13:09:19 -0500 Subject: [PATCH 1/2] Add Priority tag to ApproximateBacklogCount --- service/matching/db.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/service/matching/db.go b/service/matching/db.go index 937798ebbdd..88a8f370548 100644 --- a/service/matching/db.go +++ b/service/matching/db.go @@ -11,6 +11,7 @@ import ( enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" persistencespb "go.temporal.io/server/api/persistence/v1" + "go.temporal.io/server/common/locks" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" @@ -729,10 +730,10 @@ func (db *taskQueueDB) emitBacklogGaugesLocked() { return } - var approximateBacklogCount, totalLag int64 + var totalLag int64 var oldestTime time.Time for _, s := range db.subqueues { - approximateBacklogCount += s.ApproximateBacklogCount + metrics.ApproximateBacklogCount.With(db.metricsHandler).Record(float64(s.ApproximateBacklogCount), metrics.PriorityTag(locks.Priority(s.Key.Priority))) oldestTime = minNonZeroTime(oldestTime, s.oldestTime) // note: this metric is only an estimation for the lag. // taskID in DB may not be continuous, especially when task list ownership changes. @@ -745,7 +746,6 @@ func (db *taskQueueDB) emitBacklogGaugesLocked() { } } - metrics.ApproximateBacklogCount.With(db.metricsHandler).Record(float64(approximateBacklogCount)) if oldestTime.IsZero() { metrics.ApproximateBacklogAgeSeconds.With(db.metricsHandler).Record(0) } else { From 079656c39baf37a6b6b79caa2e8cfca4bf358d94 Mon Sep 17 00:00:00 2001 From: Jacob Moody Date: Wed, 29 Oct 2025 11:18:29 -0500 Subject: [PATCH 2/2] correct metrics tags used for task queue priority and done assume subqueue priority is unique. --- service/matching/db.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/service/matching/db.go b/service/matching/db.go index 88a8f370548..cfeebc001b8 100644 --- a/service/matching/db.go +++ b/service/matching/db.go @@ -5,13 +5,13 @@ import ( "fmt" "math" "slices" + "strconv" "sync" "time" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" persistencespb "go.temporal.io/server/api/persistence/v1" - "go.temporal.io/server/common/locks" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" @@ -732,8 +732,11 @@ func (db *taskQueueDB) emitBacklogGaugesLocked() { var totalLag int64 var oldestTime time.Time + count := make(map[int32]int64) for _, s := range db.subqueues { - metrics.ApproximateBacklogCount.With(db.metricsHandler).Record(float64(s.ApproximateBacklogCount), metrics.PriorityTag(locks.Priority(s.Key.Priority))) + c := count[s.Key.Priority] + c += s.ApproximateBacklogCount + count[s.Key.Priority] = c oldestTime = minNonZeroTime(oldestTime, s.oldestTime) // note: this metric is only an estimation for the lag. // taskID in DB may not be continuous, especially when task list ownership changes. @@ -746,6 +749,13 @@ func (db *taskQueueDB) emitBacklogGaugesLocked() { } } + for _, v := range count { + priStr := "" + if v > 0 { + priStr = strconv.FormatInt(v, 10) + } + metrics.ApproximateBacklogCount.With(db.metricsHandler).Record(float64(v), metrics.StringTag(metrics.TaskPriorityTagName, priStr)) + } if oldestTime.IsZero() { metrics.ApproximateBacklogAgeSeconds.With(db.metricsHandler).Record(0) } else {