Skip to content

Commit e3ddec0

Browse files
committed
chore: add replication task latency histogram
Add exponential histogram for replication task generation latency in TaskAckManager, alongside the existing timer. - Dual-emit ExponentialTaskLatency (task_latency_ns) histogram with the existing TaskLatency timer in TaskAckManager.getTasks() - Register task_latency and task_latency_ns in HistogramMigrationMetrics allowlist for configurable rollout CadenceCDNC-17610
1 parent 4e6f35e commit e3ddec0

File tree

3 files changed

+12
-0
lines changed

3 files changed

+12
-0
lines changed

common/metrics/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ func (h *HistogramMigration) UnmarshalYAML(read func(any) error) error {
4141
// This is likely best done in an `init` func, to ensure it happens early enough
4242
// and does not race with config reading.
4343
var HistogramMigrationMetrics = map[string]struct{}{
44+
// History task generation latency (replication task-ack path).
45+
// Dual-emitted as timer + histogram.
46+
"task_latency": {},
47+
"task_latency_ns": {},
48+
4449
"task_latency_processing": {},
4550
"task_latency_processing_ns": {},
4651

common/metrics/defs.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,6 +2480,7 @@ const (
24802480
const (
24812481
TaskRequests = iota + NumCommonMetrics
24822482
TaskLatency
2483+
ExponentialTaskLatency
24832484
TaskFailures
24842485
TaskDiscarded
24852486
TaskAttemptTimer
@@ -3309,6 +3310,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
33093310
History: {
33103311
TaskRequests: {metricName: "task_requests", metricType: Counter},
33113312
TaskLatency: {metricName: "task_latency", metricType: Timer},
3313+
ExponentialTaskLatency: {metricName: "task_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
33123314
TaskAttemptTimer: {metricName: "task_attempt", metricType: Timer},
33133315
TaskFailures: {metricName: "task_errors", metricType: Counter},
33143316
TaskDiscarded: {metricName: "task_errors_discarded", metricType: Counter},

service/history/replication/task_ack_manager.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,13 @@ func (t *TaskAckManager) getTasks(ctx context.Context, pollingCluster string, la
131131
lastReadTaskID = previousReadTaskID
132132
}
133133

134+
// Keep timer (backwards compatible), dual-emit exponential histogram for migration.
135+
taskGeneratedStart := t.timeSource.Now()
134136
taskGeneratedTimer := t.scope.StartTimer(metrics.TaskLatency)
135137
defer taskGeneratedTimer.Stop()
138+
defer func() {
139+
t.scope.ExponentialHistogram(metrics.ExponentialTaskLatency, t.timeSource.Since(taskGeneratedStart))
140+
}()
136141

137142
batchSize := t.dynamicTaskBatchSizer.value()
138143
t.scope.UpdateGauge(metrics.ReplicationTasksBatchSize, float64(batchSize))

0 commit comments

Comments
 (0)