Skip to content

Commit 759e0be

Browse files
committed
fix(metrics): use _counts histogram for replication tasks lag
Switch replication task cleanup lag histogram emission from duration/ns to integer counts, and update metric defs plus histogram migration allowlist mapping accordingly. Signed-off-by: Diana Zawadzki <dzawa@live.de>
1 parent 29c6b41 commit 759e0be

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

common/metrics/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ var HistogramMigrationMetrics = map[string]struct{}{
4747
// Replication task processor histograms (PR #7685).
4848
// Dual-emitted as timer + histogram.
4949
"replication_tasks_lag": {},
50-
"replication_tasks_lag_ns": {},
50+
"replication_tasks_lag_counts": {},
5151
"replication_tasks_applied_latency": {},
5252
"replication_tasks_applied_latency_ns": {},
5353

common/metrics/defs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3541,7 +3541,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
35413541
ReplicationTasksApplied: {metricName: "replication_tasks_applied", metricType: Counter},
35423542
ReplicationTasksFailed: {metricName: "replication_tasks_failed", metricType: Counter},
35433543
ReplicationTasksLag: {metricName: "replication_tasks_lag", metricType: Timer},
3544-
ExponentialReplicationTasksLag: {metricName: "replication_tasks_lag_ns", metricType: Histogram, exponentialBuckets: Mid1ms24h},
3544+
ExponentialReplicationTasksLag: {metricName: "replication_tasks_lag_counts", metricType: Histogram, intExponentialBuckets: Mid1To16k},
35453545
ReplicationTasksLagRaw: {metricName: "replication_tasks_lag_raw", metricType: Timer},
35463546
ReplicationTasksDelay: {metricName: "replication_tasks_delay", metricType: Histogram, buckets: ReplicationTaskDelayBucket},
35473547
ReplicationTasksFetched: {metricName: "replication_tasks_fetched", metricType: Timer},

service/history/replication/task_processor.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ func (p *taskProcessorImpl) cleanupAckedReplicationTasks() error {
278278
persistence.HistoryTaskCategoryReplication,
279279
p.currentCluster,
280280
).GetTaskID()
281-
lag := time.Duration(maxReadLevel - minAckLevel)
281+
lagCount := int(maxReadLevel - minAckLevel)
282282
scope := p.metricsClient.Scope(metrics.ReplicationTaskFetcherScope,
283283
metrics.TargetClusterTag(p.currentCluster),
284284
)
285-
scope.RecordTimer(metrics.ReplicationTasksLag, lag)
286-
scope.ExponentialHistogram(metrics.ExponentialReplicationTasksLag, lag)
285+
scope.RecordTimer(metrics.ReplicationTasksLag, time.Duration(lagCount))
286+
scope.IntExponentialHistogram(metrics.ExponentialReplicationTasksLag, lagCount)
287287
for {
288288
pageSize := p.config.ReplicatorTaskDeleteBatchSize()
289289
resp, err := p.shard.GetExecutionManager().RangeCompleteHistoryTask(

0 commit comments

Comments
 (0)