Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2197,6 +2197,10 @@ const (
PersistenceFailures
PersistenceLatency
PersistenceLatencyHistogram
// Deprecated: replaced with PersistenceLatencyHistogram, but kept for a while for backwards compatibility reasons.
// The buckets are very similar to PersistenceLatencyHistogram, but they cannot be subset like other exponential histograms.
// This will be removed in a later server version.
PersistenceLatencyManualHistogram
PersistenceErrShardExistsCounter
PersistenceErrShardOwnershipLostCounter
PersistenceErrConditionFailedCounter
Expand All @@ -2218,7 +2222,9 @@ const (
PersistenceRequestsPerShard
PersistenceFailuresPerDomain
PersistenceLatencyPerDomain
PersistenceLatencyPerDomainHistogram
PersistenceLatencyPerShard
PersistenceLatencyPerShardHistogram
PersistenceErrShardExistsCounterPerDomain
PersistenceErrShardOwnershipLostCounterPerDomain
PersistenceErrConditionFailedCounterPerDomain
Expand Down Expand Up @@ -2975,7 +2981,8 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
PersistenceRequests: {metricName: "persistence_requests", metricType: Counter},
PersistenceFailures: {metricName: "persistence_errors", metricType: Counter},
PersistenceLatency: {metricName: "persistence_latency", metricType: Timer},
PersistenceLatencyHistogram: {metricName: "persistence_latency_histogram", metricType: Histogram, buckets: PersistenceLatencyBuckets},
PersistenceLatencyHistogram: {metricName: "persistence_latency_ns", metricType: Histogram, exponentialBuckets: Default1ms100s},
PersistenceLatencyManualHistogram: {metricName: "persistence_latency_histogram", metricType: Histogram, buckets: PersistenceLatencyBuckets},
PersistenceErrShardExistsCounter: {metricName: "persistence_errors_shard_exists", metricType: Counter},
PersistenceErrShardOwnershipLostCounter: {metricName: "persistence_errors_shard_ownership_lost", metricType: Counter},
PersistenceErrConditionFailedCounter: {metricName: "persistence_errors_condition_failed", metricType: Counter},
Expand All @@ -2996,7 +3003,9 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
PersistenceRequestsPerShard: {metricName: "persistence_requests_per_shard", metricType: Counter},
PersistenceFailuresPerDomain: {metricName: "persistence_errors_per_domain", metricRollupName: "persistence_errors", metricType: Counter},
PersistenceLatencyPerDomain: {metricName: "persistence_latency_per_domain", metricRollupName: "persistence_latency", metricType: Timer},
PersistenceLatencyPerDomainHistogram: {metricName: "persistence_latency_per_domain_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
PersistenceLatencyPerShard: {metricName: "persistence_latency_per_shard", metricType: Timer},
PersistenceLatencyPerShardHistogram: {metricName: "persistence_latency_per_shard_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
PersistenceErrShardExistsCounterPerDomain: {metricName: "persistence_errors_shard_exists_per_domain", metricRollupName: "persistence_errors_shard_exists", metricType: Counter},
PersistenceErrShardOwnershipLostCounterPerDomain: {metricName: "persistence_errors_shard_ownership_lost_per_domain", metricRollupName: "persistence_errors_shard_ownership_lost", metricType: Counter},
PersistenceErrConditionFailedCounterPerDomain: {metricName: "persistence_errors_condition_failed_per_domain", metricRollupName: "persistence_errors_condition_failed", metricType: Counter},
Expand Down
10 changes: 5 additions & 5 deletions common/metrics/defs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,27 +96,27 @@ func TestScopeDefsMapped(t *testing.T) {
func TestMetricDefsMapped(t *testing.T) {
for i := CadenceRequests; i < NumCommonMetrics; i++ {
key, ok := MetricDefs[Common][i]
require.True(t, ok)
require.True(t, ok, "common enum %v is missing a metric definition", i)
require.NotEmpty(t, key)
}
for i := TaskRequests; i < NumHistoryMetrics; i++ {
key, ok := MetricDefs[History][i]
require.True(t, ok)
require.True(t, ok, "history enum %v is missing a metric definition", i)
require.NotEmpty(t, key)
}
for i := PollSuccessPerTaskListCounter; i < NumMatchingMetrics; i++ {
key, ok := MetricDefs[Matching][i]
require.True(t, ok)
require.True(t, ok, "matching enum %v is missing a metric definition", i)
require.NotEmpty(t, key)
}
for i := ReplicatorMessages; i < NumWorkerMetrics; i++ {
key, ok := MetricDefs[Worker][i]
require.True(t, ok)
require.True(t, ok, "worker enum %v is missing a metric definition", i)
require.NotEmpty(t, key)
}
for i := ShardDistributorRequests; i < NumShardDistributorMetrics; i++ {
key, ok := MetricDefs[ShardDistributor][i]
require.True(t, ok)
require.True(t, ok, "shard distributor enum %v is missing a metric definition", i)
require.NotEmpty(t, key)
}
}
Expand Down
28 changes: 20 additions & 8 deletions common/persistence/wrappers/metered/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,14 @@ func (p *base) call(scope metrics.ScopeIdx, op func() error, tags ...metrics.Tag
duration := time.Since(before)
if len(tags) > 0 {
metricsScope.RecordTimer(metrics.PersistenceLatencyPerDomain, duration)
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyPerDomainHistogram, duration)
} else {
metricsScope.RecordTimer(metrics.PersistenceLatency, duration)
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyHistogram, duration)
}

if p.enableLatencyHistogramMetrics {
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyHistogram, duration)
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyManualHistogram, duration)
}

logger := p.logger.Helper()
Expand All @@ -161,9 +163,10 @@ func (p *base) callWithoutDomainTag(scope metrics.ScopeIdx, op func() error, tag
err := op()
duration := time.Since(before)
metricsScope.RecordTimer(metrics.PersistenceLatency, duration)
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyHistogram, duration)

if p.enableLatencyHistogramMetrics {
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyHistogram, duration)
metricsScope.RecordHistogramDuration(metrics.PersistenceLatencyManualHistogram, duration)
}
if err != nil {
p.updateErrorMetric(scope, err, metricsScope, p.logger.Helper())
Expand All @@ -172,24 +175,33 @@ func (p *base) callWithoutDomainTag(scope metrics.ScopeIdx, op func() error, tag
}

func (p *base) callWithDomainAndShardScope(scope metrics.ScopeIdx, op func() error, domainTag metrics.Tag, shardIDTag metrics.Tag, additionalTags ...metrics.Tag) error {
// caution: additionalTags is generally unsafe in Prometheus, as it varies the tags applied to a metric.
// this method should be changed to use specific tags, and always emit the same list, using empty values where necessary.
overallScope := p.metricClient.Scope(scope)
domainMetricsScope := p.metricClient.Scope(scope, append([]metrics.Tag{domainTag}, additionalTags...)...)
shardOperationsMetricsScope := p.metricClient.Scope(scope, append([]metrics.Tag{shardIDTag}, additionalTags...)...)
shardOverallMetricsScope := p.metricClient.Scope(metrics.PersistenceShardRequestCountScope, shardIDTag)
shardOverallMetricsScope := p.metricClient.Scope(metrics.PersistenceShardRequestCountScope, shardIDTag) // operation:shardidpersistencerequest

domainMetricsScope.IncCounter(metrics.PersistenceRequestsPerDomain)
domainMetricsScope.IncCounter(metrics.PersistenceRequestsPerDomain) // also emits PersistenceRequests
shardOperationsMetricsScope.IncCounter(metrics.PersistenceRequestsPerShard)
shardOverallMetricsScope.IncCounter(metrics.PersistenceRequestsPerShard)

before := time.Now()
err := op()
duration := time.Since(before)

domainMetricsScope.RecordTimer(metrics.PersistenceLatencyPerDomain, duration)
shardOperationsMetricsScope.RecordTimer(metrics.PersistenceLatencyPerShard, duration)
shardOverallMetricsScope.RecordTimer(metrics.PersistenceLatencyPerShard, duration)
domainMetricsScope.RecordTimer(metrics.PersistenceLatencyPerDomain, duration) // also emits PersistenceLatency
domainMetricsScope.RecordHistogramDuration(metrics.PersistenceLatencyPerDomainHistogram, duration)
overallScope.RecordHistogramDuration(metrics.PersistenceLatencyHistogram, duration)

shardOperationsMetricsScope.RecordTimer(metrics.PersistenceLatencyPerShard, duration) // operation:{scope argument}
shardOperationsMetricsScope.RecordHistogramDuration(metrics.PersistenceLatencyPerShardHistogram, duration)

shardOverallMetricsScope.RecordTimer(metrics.PersistenceLatencyPerShard, duration) // operation:shardidpersistencerequest
shardOverallMetricsScope.RecordHistogramDuration(metrics.PersistenceLatencyPerShardHistogram, duration)

if p.enableLatencyHistogramMetrics {
domainMetricsScope.RecordHistogramDuration(metrics.PersistenceLatencyHistogram, duration)
domainMetricsScope.RecordHistogramDuration(metrics.PersistenceLatencyManualHistogram, duration) // manual buckets, being deprecated
}
if err != nil {
p.updateErrorMetricPerDomain(scope, err, domainMetricsScope, p.logger.Helper())
Expand Down
Loading