diff --git a/Makefile b/Makefile
index 51bd35f16a6..753970abe46 100644
--- a/Makefile
+++ b/Makefile
@@ -64,7 +64,7 @@ $(BUILD)/goversion-lint:
 $(BUILD)/fmt: $(BUILD)/codegen # formatting must occur only after all other go-file-modifications are done
 # $(BUILD)/copyright 
 # $(BUILD)/copyright: $(BUILD)/codegen # must add copyright to generated code, sometimes needs re-formatting
-$(BUILD)/codegen: $(BUILD)/thrift $(BUILD)/protoc
+$(BUILD)/codegen: $(BUILD)/thrift $(BUILD)/protoc $(BUILD)/metrics
 $(BUILD)/thrift: $(BUILD)/go_mod_check
 $(BUILD)/protoc: $(BUILD)/go_mod_check
 $(BUILD)/go_mod_check:
@@ -211,6 +211,9 @@ $(BIN)/protoc-gen-gogofast: go.mod go.work | $(BIN)
 $(BIN)/protoc-gen-yarpc-go: go.mod go.work | $(BIN)
 	$(call go_mod_build_tool,go.uber.org/yarpc/encoding/protobuf/protoc-gen-yarpc-go)
 
+$(BIN)/metricslint: internal/tools/go.mod go.work $(wildcard internal/tools/metricslint/* internal/tools/metricslint/cmd/*) | $(BIN)
+	$(call go_build_tool,./metricslint/cmd,metricslint)
+
 $(BUILD)/go_mod_check: go.mod internal/tools/go.mod go.work
 	$Q # generated == used is occasionally important for gomock / mock libs in general.  this is not a definite problem if violated though.
 	$Q ./scripts/check-gomod-version.sh github.com/golang/mock/gomock $(if $(verbose),-v)
@@ -404,6 +407,11 @@ $(BUILD)/code-lint: $(LINT_SRC) $(BIN)/revive | $(BUILD)
 		fi
 	$Q touch $@
 
+$(BUILD)/metrics-lint: $(ALL_SRC) $(BIN)/metricslint | $(BUILD)
+	$Q echo "linting metrics definitions..."
+	$Q $(BIN_PATH) $(BIN)/metricslint -skip cadence_requests_per_tl,2 -skip cache_hit,2 -skip cache_full,2 -skip cache_miss,2 -skip cross_cluster_fetch_errors,2 ./...
+	$Q touch $@
+
 $(BUILD)/goversion-lint: go.work Dockerfile docker/github_actions/Dockerfile${DOCKERFILE_SUFFIX}
 	$Q echo "checking go version..."
 	$Q # intentionally using go.work toolchain, as GOTOOLCHAIN is user-overridable
@@ -458,7 +466,7 @@ endef
 # useful to actually re-run to get output again.
 # reuse the intermediates for simplicity and consistency.
 lint: ## (Re)run the linter
-	$(call remake,proto-lint gomod-lint code-lint goversion-lint)
+	$(call remake,proto-lint gomod-lint code-lint goversion-lint metrics-lint)
 
 # intentionally not re-making, it's a bit slow and it's clear when it's unnecessary
 fmt: $(BUILD)/fmt ## Run `gofmt` / organize imports / etc
@@ -547,7 +555,7 @@ tools: $(TOOLS)
 go-generate: $(BIN)/mockgen $(BIN)/enumer $(BIN)/mockery  $(BIN)/gowrap ## Run `go generate` to regen mocks, enums, etc
 	$Q echo "running go generate ./..., this takes a minute or more..."
 	$Q # add our bins to PATH so `go generate` can find them
-	$Q $(BIN_PATH) go generate $(if $(verbose),-v) ./...
+	$Q $(BIN_PATH) go generate $(if $(verbose),-v) ./common/metrics
 	$Q $(MAKE) --no-print-directory fmt
 # 	$Q echo "updating copyright headers"
 # 	$Q $(MAKE) --no-print-directory copyright
@@ -577,7 +585,7 @@ tidy: ## `go mod tidy` all packages
 clean: ## Clean build products and SQLite database
 	rm -f $(BINS)
 	rm -Rf $(BUILD)
-	rm *.db
+	rm -f *.db
 	$(if \
 		$(wildcard $(STABLE_BIN)/*), \
 		$(warning usually-stable build tools still exist, delete the $(STABLE_BIN) folder to rebuild them),)
diff --git a/common/membership/resolver.go b/common/membership/resolver.go
index 28b5162db21..a06309dd899 100644
--- a/common/membership/resolver.go
+++ b/common/membership/resolver.go
@@ -217,7 +217,7 @@ func (rpo *MultiringResolver) LookupByAddress(service, address string) (HostInfo
 			return m, nil
 		}
 	}
-	rpo.metrics.Scope(metrics.ResolverHostNotFoundScope).IncCounter(1)
+	rpo.metrics.Scope(metrics.ResolverHostNotFoundScope).IncCounter(metrics.RingResolverError)
 	return HostInfo{}, fmt.Errorf("host not found in service %s: %s", service, address)
 }
 
diff --git a/common/metrics/defs.go b/common/metrics/defs.go
index 00214a52649..6918349261c 100644
--- a/common/metrics/defs.go
+++ b/common/metrics/defs.go
@@ -37,10 +37,12 @@ type (
 
 	// metricDefinition contains the definition for a metric
 	metricDefinition struct {
-		metricType       MetricType    // metric type
-		metricName       MetricName    // metric name
-		metricRollupName MetricName    // optional. if non-empty, this name must be used for rolled-up version of this metric
-		buckets          tally.Buckets // buckets if we are emitting histograms
+		metricType            MetricType    // metric type
+		metricName            MetricName    // metric name
+		metricRollupName      MetricName    // optional. if non-empty, this name must be used for rolled-up version of this metric
+		buckets               tally.Buckets // buckets if we are emitting histograms
+		exponentialBuckets    histogrammy[SubsettableHistogram]
+		intExponentialBuckets histogrammy[IntSubsettableHistogram]
 	}
 
 	// scopeDefinition holds the tag definitions for a scope
@@ -53,6 +55,10 @@ type (
 	ServiceIdx int
 )
 
+func (s scopeDefinition) GetOperationString() string {
+	return s.operation
+}
+
 // MetricTypes which are supported
 const (
 	Counter MetricType = iota
@@ -1068,7 +1074,7 @@ const (
 // -- Operation scopes for History service --
 const (
 	// HistoryStartWorkflowExecutionScope tracks StartWorkflowExecution API calls received by service
-	HistoryStartWorkflowExecutionScope = iota + NumCommonScopes
+	HistoryStartWorkflowExecutionScope = iota + NumFrontendScopes
 	// HistoryRecordActivityTaskHeartbeatScope tracks RecordActivityTaskHeartbeat API calls received by service
 	HistoryRecordActivityTaskHeartbeatScope
 	// HistoryRespondDecisionTaskCompletedScope tracks RespondDecisionTaskCompleted API calls received by service
@@ -1356,7 +1362,7 @@ const (
 // -- Operation scopes for Matching service --
 const (
 	// PollForDecisionTaskScope tracks PollForDecisionTask API calls received by service
-	MatchingPollForDecisionTaskScope = iota + NumCommonScopes
+	MatchingPollForDecisionTaskScope = iota + NumHistoryScopes
 	// PollForActivityTaskScope tracks PollForActivityTask API calls received by service
 	MatchingPollForActivityTaskScope
 	// MatchingAddActivityTaskScope tracks AddActivityTask API calls received by service
@@ -1392,7 +1398,7 @@ const (
 // -- Operation scopes for Worker service --
 const (
 	// ReplicationScope is the scope used by all metric emitted by replicator
-	ReplicatorScope = iota + NumCommonScopes
+	ReplicatorScope = iota + NumMatchingScopes
 	// DomainReplicationTaskScope is the scope used by domain task replication processing
 	DomainReplicationTaskScope
 	// ESProcessorScope is scope used by all metric emitted by esProcessor
@@ -1440,7 +1446,7 @@ const (
 // -- Operation scopes for ShardDistributor service --
 const (
 	// ShardDistributorGetShardOwnerScope tracks GetShardOwner API calls received by service
-	ShardDistributorGetShardOwnerScope = iota + NumCommonScopes
+	ShardDistributorGetShardOwnerScope = iota + NumWorkerScopes
 	ShardDistributorHeartbeatScope
 	ShardDistributorAssignLoopScope
 
@@ -2398,6 +2404,8 @@ const (
 	ActiveClusterManagerLookupFailureCount
 	ActiveClusterManagerLookupLatency
 
+	RingResolverError
+
 	NumCommonMetrics // Needs to be last on this list for iota numbering
 )
 
@@ -2414,6 +2422,7 @@ const (
 	TaskBatchCompleteCounter
 	TaskBatchCompleteFailure
 	TaskProcessingLatency
+	ExponentialTaskProcessingLatency
 	TaskQueueLatency
 	ScheduleToStartHistoryQueueLatencyPerTaskList
 	TaskRequestsOldScheduler
@@ -2676,6 +2685,7 @@ const (
 	ReplicationTaskCleanupCount
 	ReplicationTaskCleanupFailure
 	ReplicationTaskLatency
+	ExponentialReplicationTaskLatency
 	MutableStateChecksumMismatch
 	MutableStateChecksumInvalidated
 	FailoverMarkerCount
@@ -2700,12 +2710,13 @@ const (
 	VirtualQueueCountGauge
 	VirtualQueuePausedGauge
 	VirtualQueueRunningGauge
+
 	NumHistoryMetrics
 )
 
 // Matching metrics enum
 const (
-	PollSuccessPerTaskListCounter = iota + NumCommonMetrics
+	PollSuccessPerTaskListCounter = iota + NumHistoryMetrics
 	PollTimeoutPerTaskListCounter
 	PollSuccessWithSyncPerTaskListCounter
 	LeaseRequestPerTaskListCounter
@@ -2783,12 +2794,13 @@ const (
 	IsolationGroupUpscale
 	IsolationGroupDownscale
 	PartitionDrained
+
 	NumMatchingMetrics
 )
 
 // Worker metrics enum
 const (
-	ReplicatorMessages = iota + NumCommonMetrics
+	ReplicatorMessages = iota + NumMatchingMetrics
 	ReplicatorFailures
 	ReplicatorMessagesDropped
 	ReplicatorLatency
@@ -2872,12 +2884,13 @@ const (
 	DiagnosticsWorkflowStartedCount
 	DiagnosticsWorkflowSuccess
 	DiagnosticsWorkflowExecutionLatency
+
 	NumWorkerMetrics
 )
 
 // ShardDistributor metrics enum
 const (
-	ShardDistributorRequests = iota + NumCommonMetrics
+	ShardDistributorRequests = iota + NumWorkerMetrics
 	ShardDistributorFailures
 	ShardDistributorLatency
 	ShardDistributorErrContextTimeoutCounter
@@ -3178,18 +3191,21 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
 		ActiveClusterManagerLookupSuccessCount: {metricName: "active_cluster_manager_lookup_success_count", metricType: Counter},
 		ActiveClusterManagerLookupFailureCount: {metricName: "active_cluster_manager_lookup_failure_count", metricType: Counter},
 		ActiveClusterManagerLookupLatency:      {metricName: "active_cluster_manager_lookup_latency", metricType: Histogram, buckets: ExponentialDurationBuckets},
+
+		RingResolverError: {metricName: "ring_resolver_error", metricType: Counter},
 	},
 	History: {
-		TaskRequests:             {metricName: "task_requests", metricType: Counter},
-		TaskLatency:              {metricName: "task_latency", metricType: Timer},
-		TaskAttemptTimer:         {metricName: "task_attempt", metricType: Timer},
-		TaskFailures:             {metricName: "task_errors", metricType: Counter},
-		TaskDiscarded:            {metricName: "task_errors_discarded", metricType: Counter},
-		TaskStandbyRetryCounter:  {metricName: "task_errors_standby_retry_counter", metricType: Counter},
-		TaskNotActiveCounter:     {metricName: "task_errors_not_active_counter", metricType: Counter},
-		TaskLimitExceededCounter: {metricName: "task_errors_limit_exceeded_counter", metricType: Counter},
-		TaskProcessingLatency:    {metricName: "task_latency_processing", metricType: Timer},
-		TaskQueueLatency:         {metricName: "task_latency_queue", metricType: Timer},
+		TaskRequests:                     {metricName: "task_requests", metricType: Counter},
+		TaskLatency:                      {metricName: "task_latency", metricType: Timer},
+		TaskAttemptTimer:                 {metricName: "task_attempt", metricType: Timer},
+		TaskFailures:                     {metricName: "task_errors", metricType: Counter},
+		TaskDiscarded:                    {metricName: "task_errors_discarded", metricType: Counter},
+		TaskStandbyRetryCounter:          {metricName: "task_errors_standby_retry_counter", metricType: Counter},
+		TaskNotActiveCounter:             {metricName: "task_errors_not_active_counter", metricType: Counter},
+		TaskLimitExceededCounter:         {metricName: "task_errors_limit_exceeded_counter", metricType: Counter},
+		TaskProcessingLatency:            {metricName: "task_latency_processing", metricType: Timer},
+		ExponentialTaskProcessingLatency: {metricName: "task_latency_processing_ns", metricType: Histogram, exponentialBuckets: Low1ms10s},
+		TaskQueueLatency:                 {metricName: "task_latency_queue", metricType: Timer},
 		ScheduleToStartHistoryQueueLatencyPerTaskList: {metricName: "schedule_to_start_history_queue_latency_per_tl", metricType: Timer},
 		TaskRequestsOldScheduler:                      {metricName: "task_requests_old_scheduler", metricType: Counter},
 		TaskRequestsNewScheduler:                      {metricName: "task_requests_new_scheduler", metricType: Counter},
@@ -3444,6 +3460,7 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
 		ReplicationTaskCleanupCount:                                  {metricName: "replication_task_cleanup_count", metricType: Counter},
 		ReplicationTaskCleanupFailure:                                {metricName: "replication_task_cleanup_failed", metricType: Counter},
 		ReplicationTaskLatency:                                       {metricName: "replication_task_latency", metricType: Timer},
+		ExponentialReplicationTaskLatency:                            {metricName: "replication_task_latency_ns", metricType: Histogram, exponentialBuckets: Mid1ms24h},
 		MutableStateChecksumMismatch:                                 {metricName: "mutable_state_checksum_mismatch", metricType: Counter},
 		MutableStateChecksumInvalidated:                              {metricName: "mutable_state_checksum_invalidated", metricType: Counter},
 		FailoverMarkerCount:                                          {metricName: "failover_marker_count", metricType: Counter},
diff --git a/common/metrics/defs_test.go b/common/metrics/defs_test.go
index 122aa359645..db6c15a96a3 100644
--- a/common/metrics/defs_test.go
+++ b/common/metrics/defs_test.go
@@ -129,6 +129,39 @@ func TestMetricDefs(t *testing.T) {
 	}
 }
 
+// "index -> operation" must be unique for structured.DynamicOperationTags' int lookup to work consistently.
+// Duplicate indexes with the same operation name are technically fine, but there doesn't seem to be any benefit in allowing it,
+// and it trivially ensures that all indexes have only one operation name.
+func TestOperationIndexesAreUnique(t *testing.T) {
+	seen := make(map[int]bool)
+	for serviceIdx, serviceOps := range ScopeDefs {
+		for idx := range serviceOps {
+			if seen[idx] {
+				t.Error("duplicate operation index:", idx, "with name:", serviceOps[idx].operation, "in service:", serviceIdx)
+			}
+			seen[idx] = true
+		}
+	}
+}
+
+func TestMetricsAreUnique(t *testing.T) {
+	// Duplicate indexes is arguably fine, but there doesn't seem to be any benefit in allowing it.
+	//
+	// Duplicate names are also linted, but they're done via an analyzer (metricslint) instead, to
+	// allow checking across multiple formats.
+	t.Run("indexes", func(t *testing.T) {
+		seen := make(map[int]bool)
+		for _, serviceMetrics := range MetricDefs {
+			for idx := range serviceMetrics {
+				if seen[idx] {
+					t.Error("duplicate metric index:", idx, "with name:", serviceMetrics[idx].metricName)
+				}
+				seen[idx] = true
+			}
+		}
+	})
+}
+
 func TestExponentialDurationBuckets(t *testing.T) {
 	factor := math.Pow(2, 0.25)
 	assert.Equal(t, 80, len(ExponentialDurationBuckets))
diff --git a/common/metrics/histograms.go b/common/metrics/histograms.go
new file mode 100644
index 00000000000..80b26ada4c8
--- /dev/null
+++ b/common/metrics/histograms.go
@@ -0,0 +1,236 @@
+package metrics
+
+import (
+	"fmt"
+	"math"
+	"slices"
+	"time"
+
+	"github.com/uber-go/tally"
+)
+
+// Nearly all histograms should use pre-defined buckets here, to encourage consistency.
+// Name them more by their intent than their exact ranges - if you need something outside the intent,
+// make a new bucket.
+//
+// Extreme, rare cases should still create a new bucket here, not declare one in-line elsewhere.
+//
+// Partly this helps us document our ranges, and partly it ensures that any customized emitter
+// can easily detect buckets with an `==` comparison, and can choose to use a different one
+// if needed (e.g. to reduce scale if it is too costly to support).
+var (
+	// Default1ms10m is our "default" set of buckets, targeting 1ms through 100s,
+	// and is "rounded up" slightly to reach 80 buckets == 16 minutes (100s needs 68 buckets),
+	// plus multi-minute exceptions are common enough to support for the small additional cost.
+	//
+	// If you need sub-millisecond precision, or substantially longer durations than about 1 minute,
+	// consider using a different histogram.
+	Default1ms10m = makeSubsettableHistogram(time.Millisecond, 2, func(last time.Duration, length int) bool {
+		return last >= 10*time.Minute && length == 80
+	})
+
+	// High1ms24h covers things like activity latency, where "longer than 1 day" is not
+	// particularly worth being precise about.
+	//
+	// This uses a lot of buckets, so it must be used with relatively-low cardinality elsewhere,
+	// and/or consider dual emitting (Mid1ms24h or lower) so overviews can be queried efficiently.
+	High1ms24h = makeSubsettableHistogram(time.Millisecond, 2, func(last time.Duration, length int) bool {
+		// 24h leads to 108 buckets, raise to 112 for cleaner subsetting
+		return last >= 24*time.Hour && length == 112
+	})
+
+	// Mid1ms24h is a one-scale-lower version of High1ms24h,
+	// for use when we know it's too detailed to be worth emitting.
+	//
+	// This uses 57 buckets, half of High1ms24h's 113
+	Mid1ms24h = High1ms24h.subsetTo(1)
+
+	// Low1ms10s is for things that are usually very fast, like most individual database calls,
+	// and is MUCH lower cardinality than Default1ms10m so it's more suitable for things like per-shard metrics.
+	Low1ms10s = makeSubsettableHistogram(time.Millisecond, 1, func(last time.Duration, length int) bool {
+		// 10s needs 26 buckets, raise to 32 for cleaner subsetting
+		return last >= 10*time.Second && length == 32
+	})
+
+	// Mid1To32k is a histogram for small counters, like "how many replication tasks did we receive".
+	// This targets 1 to 32k
+	Mid1To32k = IntSubsettableHistogram(makeSubsettableHistogram(1, 2, func(last time.Duration, length int) bool {
+		// 10k needs 56 buckets, raise to 64 for cleaner subsetting
+		return last >= 10000 && length == 64
+	}))
+)
+
+// SubsettableHistogram is a duration-based histogram that can be subset to a lower scale
+// in a standardized, predictable way.  It is intentionally compatible with Prometheus and OTEL's
+// "exponential histograms": https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram
+//
+// These histograms MUST always have a "_ns" suffix in their name to avoid confusion with timers.
+type SubsettableHistogram struct {
+	tallyBuckets tally.DurationBuckets
+
+	scale int
+}
+
+// IntSubsettableHistogram is a non-duration-based integer-distribution histogram, otherwise identical
+// to SubsettableHistogram but built as a separate type so you cannot pass the wrong one.
+//
+// These histograms MUST always have a "_counts" suffix in their name to avoid confusion with timers,
+// or modify the Emitter to allow different suffixes if something else reads better.
+type IntSubsettableHistogram SubsettableHistogram
+
+// currently we have no apparent need for float-histograms,
+// as all our value ranges go from >=1 to many thousands, where
+// decimal precision is pointless and mostly just looks bad.
+//
+// if we ever need 0..1 precision in histograms, we can add them then.
+// 	type FloatSubsettableHistogram struct {
+// 		tally.ValueBuckets
+// 		scale int
+// 	}
+
+func (s SubsettableHistogram) subsetTo(newScale int) SubsettableHistogram {
+	if newScale >= s.scale {
+		panic(fmt.Sprintf("scale %v is not less than the current scale %v", newScale, s.scale))
+	}
+	if newScale < 0 {
+		panic(fmt.Sprintf("negative scales (%v == greater than *2 per step) are possible, but do not have tests yet", newScale))
+	}
+	dup := SubsettableHistogram{
+		tallyBuckets: slices.Clone(s.tallyBuckets),
+		scale:        s.scale,
+	}
+	// remove every other bucket per -1 scale
+	for dup.scale > newScale {
+		if (len(dup.tallyBuckets)-1)%2 != 0 {
+			panic(fmt.Sprintf("cannot subset from scale %v to %v, %v-buckets is not divisible by 2", dup.scale, dup.scale-1, len(dup.tallyBuckets)-1))
+		}
+		half := make(tally.DurationBuckets, 0, ((len(dup.tallyBuckets)-1)/2)+1)
+		half = append(half, dup.tallyBuckets[0]) // keep the zero value
+		for i := 1; i < len(dup.tallyBuckets); i += 2 {
+			half = append(half, dup.tallyBuckets[i]) // add first, third, etc
+		}
+		dup.tallyBuckets = half
+		dup.scale--
+	}
+	return dup
+}
+
+func (i IntSubsettableHistogram) subsetTo(newScale int) IntSubsettableHistogram {
+	return IntSubsettableHistogram(SubsettableHistogram(i).subsetTo(newScale))
+}
+
+// makeSubsettableHistogram is a replacement for tally.MustMakeExponentialDurationBuckets,
+// tailored to make "construct a range" or "ensure N buckets" simpler for OTEL-compatible exponential histograms
+// (i.e. https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram),
+// ensures values are as precise as possible (preventing display-space-wasteful numbers like 3.99996ms),
+// and that all histograms start with a 0 value (else erroneous negative values are impossible to notice).
+//
+// Start time must be positive, scale must be 0..3 (inclusive), and the loop MUST stop within 320 or fewer steps,
+// to prevent extremely-costly histograms.
+// Even 320 is far too many tbh, target 160 for very-high-value data, and generally around 80 or less (ideally a value
+// that is divisible by 2 many times).
+//
+// The stop callback will be given the current largest value and the number of values generated so far.
+// This excludes the zero value (you should ignore it anyway), so you can stop at your target value,
+// or == a highly-divisible-by-2 number (do not go over).
+// The buckets produced may be padded further to reach a "full" power-of-2 row, as this simplifies math elsewhere
+// and costs very little compared to the rest of the histogram.
+//
+// For all values produced, please add a test to print the concrete values, and record the length and maximum value
+// so they can be quickly checked when reading.
+func makeSubsettableHistogram(start time.Duration, scale int, stop func(last time.Duration, length int) bool) SubsettableHistogram {
+	if start <= 0 {
+		panic(fmt.Sprintf("start must be greater than 0 or it will not grow exponentially, got %v", start))
+	}
+	if scale < 0 || scale > 3 {
+		// anything outside this range is currently not expected and probably a mistake
+		panic(fmt.Sprintf("scale must be between 0 (grows by *2) and 3 (grows by *2^1/8), got scale: %v", scale))
+	}
+	buckets := tally.DurationBuckets{
+		time.Duration(0), // else "too low" and "negative" are impossible to tell apart.
+		// ^ note this must be excluded from calculations below, hence -1 everywhere.
+	}
+	for {
+		if len(buckets) > 320 {
+			panic(fmt.Sprintf("over 320 buckets is too many, choose a smaller range or smaller scale.  "+
+				"started at: %v, scale: %v, last value: %v",
+				start, scale, buckets[len(buckets)-1]))
+		}
+		buckets = append(buckets, nextBucket(start, len(buckets)-1, scale))
+
+		// stop when requested.
+		if stop(buckets[len(buckets)-1], len(buckets)-1) {
+			break
+		}
+	}
+
+	// fill in as many buckets as are necessary to make a full "row", i.e. just
+	// before the next power of 2 from the original value.
+	// this ensures subsetting keeps "round" numbers as long as possible.
+	powerOfTwoWidth := int(math.Pow(2, float64(scale))) // num of buckets needed to double a value
+	for (len(buckets)-1)%powerOfTwoWidth != 0 {
+		buckets = append(buckets, nextBucket(start, len(buckets)-1, scale))
+	}
+	return SubsettableHistogram{
+		tallyBuckets: buckets,
+		scale:        scale,
+	}
+}
+
+func nextBucket(start time.Duration, num int, scale int) time.Duration {
+	// calculating it from `start` each time reduces floating point error, ensuring "clean" multiples
+	// at every power of 2 (and possibly others), instead of e.g. "1ms ... 1.9999994ms" which occurs
+	// if you try to build from the previous value each time.
+	return time.Duration(
+		float64(start) *
+			math.Pow(2, float64(num)/math.Pow(2, float64(scale))))
+}
+
+func (s SubsettableHistogram) histScale() int                 { return s.scale }
+func (s SubsettableHistogram) width() int                     { return int(math.Pow(2, float64(s.scale))) }
+func (s SubsettableHistogram) len() int                       { return len(s.tallyBuckets) }
+func (s SubsettableHistogram) start() time.Duration           { return s.tallyBuckets[1] }
+func (s SubsettableHistogram) end() time.Duration             { return s.tallyBuckets[len(s.tallyBuckets)-1] }
+func (s SubsettableHistogram) buckets() tally.DurationBuckets { return s.tallyBuckets }
+func (s SubsettableHistogram) print(to func(string, ...any)) {
+	to("%v\n", s.tallyBuckets[0:1]) // zero value on its own row
+	for rowStart := 1; rowStart < s.len(); rowStart += s.width() {
+		to("%v\n", s.tallyBuckets[rowStart:rowStart+s.width()])
+	}
+}
+
+func (i IntSubsettableHistogram) histScale() int       { return i.scale }
+func (i IntSubsettableHistogram) width() int           { return int(math.Pow(2, float64(i.scale))) }
+func (i IntSubsettableHistogram) len() int             { return len(i.tallyBuckets) }
+func (i IntSubsettableHistogram) start() time.Duration { return i.tallyBuckets[1] }
+func (i IntSubsettableHistogram) end() time.Duration {
+	return i.tallyBuckets[len(i.tallyBuckets)-1]
+}
+func (i IntSubsettableHistogram) buckets() tally.DurationBuckets { return i.tallyBuckets }
+func (i IntSubsettableHistogram) print(to func(string, ...any)) {
+	// fairly unreadable as duration-strings, so convert to int by hand
+	to("[%d]\n", int(i.tallyBuckets[0])) // zero value on its own row
+	for rowStart := 1; rowStart < i.len(); rowStart += i.width() {
+		ints := make([]int, 0, i.width())
+		for _, d := range i.tallyBuckets[rowStart : rowStart+i.width()] {
+			ints = append(ints, int(d))
+		}
+		to("%v\n", ints)
+	}
+}
+
+var _ histogrammy[SubsettableHistogram] = SubsettableHistogram{}
+var _ histogrammy[IntSubsettableHistogram] = IntSubsettableHistogram{}
+
+// internal utility/test methods, but could be exposed if there's a use for it
+type histogrammy[T any] interface {
+	histScale() int                 // exponential scale value.  0..3 inclusive.
+	width() int                     // number of values per power of 2 == how wide to print each row.  1, 2, 4, or 8.
+	len() int                       // number of buckets
+	start() time.Duration           // first non-zero bucket
+	end() time.Duration             // last bucket
+	buckets() tally.DurationBuckets // access to all buckets
+	subsetTo(newScale int) T        // generic so specific types can be returned
+
+	print(to func(string, ...any)) // test-oriented printer
+}
diff --git a/common/metrics/histograms_test.go b/common/metrics/histograms_test.go
new file mode 100644
index 00000000000..dddb12093f9
--- /dev/null
+++ b/common/metrics/histograms_test.go
@@ -0,0 +1,191 @@
+package metrics
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestHistogramValues(t *testing.T) {
+	t.Run("default_1ms_to_10m", func(t *testing.T) {
+		checkHistogram(t, Default1ms10m, 81,
+			`
+[0s]
+[1ms 1.189207ms 1.414213ms 1.681792ms]
+[2ms 2.378414ms 2.828427ms 3.363585ms]
+[4ms 4.756828ms 5.656854ms 6.727171ms]
+[8ms 9.513656ms 11.313708ms 13.454342ms]
+[16ms 19.027313ms 22.627416ms 26.908685ms]
+[32ms 38.054627ms 45.254833ms 53.81737ms]
+[64ms 76.109255ms 90.509667ms 107.634741ms]
+[128ms 152.21851ms 181.019335ms 215.269482ms]
+[256ms 304.437021ms 362.038671ms 430.538964ms]
+[512ms 608.874042ms 724.077343ms 861.077929ms]
+[1.024s 1.217748085s 1.448154687s 1.722155858s]
+[2.048s 2.435496171s 2.896309375s 3.444311716s]
+[4.096s 4.870992343s 5.792618751s 6.888623433s]
+[8.192s 9.741984686s 11.585237502s 13.777246867s]
+[16.384s 19.483969372s 23.170475005s 27.554493735s]
+[32.768s 38.967938744s 46.340950011s 55.10898747s]
+[1m5.536s 1m17.935877488s 1m32.681900023s 1m50.21797494s]
+[2m11.072s 2m35.871754977s 3m5.363800047s 3m40.43594988s]
+[4m22.144s 5m11.743509955s 6m10.727600094s 7m20.87189976s]
+[8m44.288s 10m23.48701991s 12m21.455200189s 14m41.743799521s]
+`)
+	})
+	t.Run("high_1ms_to_24h", func(t *testing.T) {
+		checkHistogram(t, High1ms24h, 113,
+			`
+[0s]
+[1ms 1.189207ms 1.414213ms 1.681792ms]
+[2ms 2.378414ms 2.828427ms 3.363585ms]
+[4ms 4.756828ms 5.656854ms 6.727171ms]
+[8ms 9.513656ms 11.313708ms 13.454342ms]
+[16ms 19.027313ms 22.627416ms 26.908685ms]
+[32ms 38.054627ms 45.254833ms 53.81737ms]
+[64ms 76.109255ms 90.509667ms 107.634741ms]
+[128ms 152.21851ms 181.019335ms 215.269482ms]
+[256ms 304.437021ms 362.038671ms 430.538964ms]
+[512ms 608.874042ms 724.077343ms 861.077929ms]
+[1.024s 1.217748085s 1.448154687s 1.722155858s]
+[2.048s 2.435496171s 2.896309375s 3.444311716s]
+[4.096s 4.870992343s 5.792618751s 6.888623433s]
+[8.192s 9.741984686s 11.585237502s 13.777246867s]
+[16.384s 19.483969372s 23.170475005s 27.554493735s]
+[32.768s 38.967938744s 46.340950011s 55.10898747s]
+[1m5.536s 1m17.935877488s 1m32.681900023s 1m50.21797494s]
+[2m11.072s 2m35.871754977s 3m5.363800047s 3m40.43594988s]
+[4m22.144s 5m11.743509955s 6m10.727600094s 7m20.87189976s]
+[8m44.288s 10m23.48701991s 12m21.455200189s 14m41.743799521s]
+[17m28.576s 20m46.974039821s 24m42.910400378s 29m23.487599042s]
+[34m57.152s 41m33.948079642s 49m25.820800757s 58m46.975198084s]
+[1h9m54.304s 1h23m7.896159284s 1h38m51.641601515s 1h57m33.950396168s]
+[2h19m48.608s 2h46m15.792318568s 3h17m43.283203031s 3h55m7.900792337s]
+[4h39m37.216s 5h32m31.584637137s 6h35m26.566406062s 7h50m15.801584674s]
+[9h19m14.432s 11h5m3.169274274s 13h10m53.132812125s 15h40m31.603169349s]
+[18h38m28.864s 22h10m6.338548549s 26h21m46.265624251s 31h21m3.206338698s]
+[37h16m57.728s 44h20m12.677097099s 52h43m32.531248503s 62h42m6.412677396s]
+`)
+	})
+	t.Run("mid_1ms_24h", func(t *testing.T) {
+		checkHistogram(t, Mid1ms24h, 57, `
+[0s]
+[1ms 1.414213ms]
+[2ms 2.828427ms]
+[4ms 5.656854ms]
+[8ms 11.313708ms]
+[16ms 22.627416ms]
+[32ms 45.254833ms]
+[64ms 90.509667ms]
+[128ms 181.019335ms]
+[256ms 362.038671ms]
+[512ms 724.077343ms]
+[1.024s 1.448154687s]
+[2.048s 2.896309375s]
+[4.096s 5.792618751s]
+[8.192s 11.585237502s]
+[16.384s 23.170475005s]
+[32.768s 46.340950011s]
+[1m5.536s 1m32.681900023s]
+[2m11.072s 3m5.363800047s]
+[4m22.144s 6m10.727600094s]
+[8m44.288s 12m21.455200189s]
+[17m28.576s 24m42.910400378s]
+[34m57.152s 49m25.820800757s]
+[1h9m54.304s 1h38m51.641601515s]
+[2h19m48.608s 3h17m43.283203031s]
+[4h39m37.216s 6h35m26.566406062s]
+[9h19m14.432s 13h10m53.132812125s]
+[18h38m28.864s 26h21m46.265624251s]
+[37h16m57.728s 52h43m32.531248503s]
+`)
+	})
+	t.Run("mid_to_32k_ints", func(t *testing.T) {
+		// note: this histogram has some duplicates.
+		//
+		// this wastes a bit of memory, but Tally will choose the same index each
+		// time for a specific value, so it does not cause extra non-zero series
+		// to be stored or emitted.
+		//
+		// if this turns out to be expensive in Prometheus / etc, it's easy
+		// enough to start the histogram at 8, and dual-emit a non-exponential
+		// histogram for lower values.
+		checkHistogram(t, Mid1To32k, 65, `
+[0]
+[1 1 1 1]
+[2 2 2 3]
+[4 4 5 6]
+[8 9 11 13]
+[16 19 22 26]
+[32 38 45 53]
+[64 76 90 107]
+[128 152 181 215]
+[256 304 362 430]
+[512 608 724 861]
+[1024 1217 1448 1722]
+[2048 2435 2896 3444]
+[4096 4870 5792 6888]
+[8192 9741 11585 13777]
+[16384 19483 23170 27554]
+[32768 38967 46340 55108]
+`)
+	})
+	t.Run("low_cardinality_1ms_10s", func(t *testing.T) {
+		checkHistogram(t, Low1ms10s, 33, `
+[0s]
+[1ms 1.414213ms]
+[2ms 2.828427ms]
+[4ms 5.656854ms]
+[8ms 11.313708ms]
+[16ms 22.627416ms]
+[32ms 45.254833ms]
+[64ms 90.509667ms]
+[128ms 181.019335ms]
+[256ms 362.038671ms]
+[512ms 724.077343ms]
+[1.024s 1.448154687s]
+[2.048s 2.896309375s]
+[4.096s 5.792618751s]
+[8.192s 11.585237502s]
+[16.384s 23.170475005s]
+[32.768s 46.340950011s]
+`)
+	})
+}
+
+// most histograms should pass this check, but fuzzy comparison is fine if needed for extreme cases.
+func checkHistogram[T any](t *testing.T, h histogrammy[T], length int, expected string) {
+	var buf strings.Builder
+	h.print(func(s string, a ...any) {
+		str := fmt.Sprintf(s, a...)
+		t.Logf(str)
+		buf.WriteString(str)
+	})
+	if strings.TrimSpace(expected) != strings.TrimSpace(buf.String()) {
+		t.Error("histogram definition changed, update the test if this is intended")
+	}
+
+	assert.Equal(t, length, h.len(), "wrong number of buckets")
+
+	buckets := h.buckets()
+	assert.EqualValues(t, 0, buckets[0], "first bucket should always be zero")
+	for i := 1; i < len(buckets); i += h.width() {
+		if i > 1 {
+			// ensure good float math.
+			//
+			// this is *intentionally* doing exact comparisons, as floating point math with
+			// human-friendly integers have precise power-of-2 multiples for a very long time.
+			//
+			// note that the equivalent tally buckets, e.g.:
+			//     tally.MustMakeExponentialDurationBuckets(time.Millisecond, math.Pow(2, 1.0/4.0))
+			// fails this test, and the logs produced show ugly e.g. 31.999942ms values.
+			// tally also produces incorrect results if you start at e.g. 1,
+			// as it just produces endless `1` values.
+			assert.Equalf(t, buckets[i-h.width()]*2, buckets[i],
+				"current row's value (%v) is not a power of 2 greater than previous (%v), skewed / bad math?",
+				buckets[i-h.width()], buckets[i])
+		}
+	}
+}
diff --git a/common/metrics/interfaces.go b/common/metrics/interfaces.go
index f36993e5f4a..695565a5b38 100644
--- a/common/metrics/interfaces.go
+++ b/common/metrics/interfaces.go
@@ -65,6 +65,10 @@ type (
 		RecordHistogramDuration(timer int, d time.Duration)
 		// RecordHistogramValue records a histogram value for the given metric name
 		RecordHistogramValue(timer int, value float64)
+		// ExponentialHistogram records a subsettable exponential histogram value for the given metric name
+		ExponentialHistogram(hist int, d time.Duration)
+		// IntExponentialHistogram records a subsettable exponential histogram value for the given metric name
+		IntExponentialHistogram(hist int, value int)
 		// UpdateGauge reports Gauge type absolute value metric
 		UpdateGauge(gauge int, value float64)
 		// Tagged return an internal scope that can be used to add additional
diff --git a/common/metrics/metricsfx/metricsfx.go b/common/metrics/metricsfx/metricsfx.go
index 56702a26107..98aa7d10151 100644
--- a/common/metrics/metricsfx/metricsfx.go
+++ b/common/metrics/metricsfx/metricsfx.go
@@ -34,7 +34,8 @@ import (
 
 // Module provides metrics client for fx application.
 var Module = fx.Module("metricsfx",
-	fx.Provide(buildClient))
+	fx.Provide(buildClient),
+)
 
 // ModuleForExternalScope provides metrics client for fx application when tally.Scope is created outside.
 var ModuleForExternalScope = fx.Module("metricsfx",
diff --git a/common/metrics/mocks/Scope.go b/common/metrics/mocks/Scope.go
index 8456725404c..e8f95750f22 100644
--- a/common/metrics/mocks/Scope.go
+++ b/common/metrics/mocks/Scope.go
@@ -54,6 +54,14 @@ func (_m *Scope) RecordHistogramValue(timer int, value float64) {
 	_m.Called(timer, value)
 }
 
+func (_m *Scope) ExponentialHistogram(hist int, d time.Duration) {
+	_m.Called(hist, d)
+}
+
+func (_m *Scope) IntExponentialHistogram(hist int, value int) {
+	_m.Called(hist, value)
+}
+
 // RecordTimer provides a mock function with given fields: timer, d
 func (_m *Scope) RecordTimer(timer int, d time.Duration) {
 	_m.Called(timer, d)
diff --git a/common/metrics/nop.go b/common/metrics/nop.go
index 5ccc9d15de5..fccbdb082ee 100644
--- a/common/metrics/nop.go
+++ b/common/metrics/nop.go
@@ -27,9 +27,9 @@ import (
 )
 
 var (
-	NoopClient    = &noopClientImpl{}
-	NoopScope     = &noopScopeImpl{}
-	NoopStopwatch = tally.NewStopwatch(time.Now(), &nopStopwatchRecorder{})
+	NoopClient    Client = &noopClientImpl{}
+	NoopScope     Scope  = &noopScopeImpl{}
+	NoopStopwatch        = tally.NewStopwatch(time.Now(), &nopStopwatchRecorder{})
 )
 
 type nopStopwatchRecorder struct{}
@@ -74,6 +74,10 @@ func NewNoopMetricsClient() Client {
 
 type noopScopeImpl struct{}
 
+func (n *noopScopeImpl) ExponentialHistogram(hist int, d time.Duration) {}
+
+func (n *noopScopeImpl) IntExponentialHistogram(hist int, value int) {}
+
 func (n *noopScopeImpl) IncCounter(counter int) {
 }
 
diff --git a/common/metrics/scope.go b/common/metrics/scope.go
index 13e1a351795..4002651a63b 100644
--- a/common/metrics/scope.go
+++ b/common/metrics/scope.go
@@ -21,6 +21,7 @@
 package metrics
 
 import (
+	"strconv"
 	"time"
 
 	"github.com/uber-go/tally"
@@ -110,6 +111,28 @@ func (m *metricsScope) RecordHistogramValue(id int, value float64) {
 	}
 }
 
+func (m *metricsScope) ExponentialHistogram(id int, value time.Duration) {
+	def := m.defs[id]
+	m.scope.Tagged(
+		map[string]string{
+			"histogram_start": def.exponentialBuckets.start().String(),
+			"histogram_end":   def.exponentialBuckets.end().String(),
+			"histogram_scale": strconv.Itoa(def.exponentialBuckets.histScale()),
+		},
+	).Histogram(def.metricName.String(), def.exponentialBuckets.buckets()).RecordDuration(value)
+}
+
+func (m *metricsScope) IntExponentialHistogram(id int, value int) {
+	def := m.defs[id]
+	m.scope.Tagged(
+		map[string]string{
+			"histogram_start": def.intExponentialBuckets.start().String(),
+			"histogram_end":   def.intExponentialBuckets.end().String(),
+			"histogram_scale": strconv.Itoa(def.intExponentialBuckets.histScale()),
+		},
+	).Histogram(def.metricName.String(), def.intExponentialBuckets.buckets()).RecordDuration(time.Duration(value))
+}
+
 func (m *metricsScope) Tagged(tags ...Tag) Scope {
 	domainTagged := m.isDomainTagged
 	tagMap := make(map[string]string, len(tags))
diff --git a/internal/tools/metricslint/analyzer.go b/internal/tools/metricslint/analyzer.go
new file mode 100644
index 00000000000..e5cb925140d
--- /dev/null
+++ b/internal/tools/metricslint/analyzer.go
@@ -0,0 +1,263 @@
+package metricslint
+
+import (
+	"go/ast"
+	"go/token"
+	"go/types"
+	"strings"
+
+	"golang.org/x/tools/go/analysis"
+	"golang.org/x/tools/go/analysis/passes/inspect"
+	"golang.org/x/tools/go/ast/inspector"
+)
+
+const (
+	scopePkg      = "github.com/uber/cadence/common/metrics"
+	scopeTypeName = scopePkg + ".Scope"
+)
+
+var scopeMethods = map[string]bool{
+	"ExponentialHistogram":    true,
+	"IntExponentialHistogram": true,
+	"IncCounter":              true,
+	"AddCounter":              true,
+	"UpdateGauge":             true,
+	"StartTimer":              true,
+	"RecordTimer":             true,
+	"RecordHistogramDuration": true,
+	"RecordHistogramValue":    true,
+}
+
+// Analyzer reports all (inline string) metric names passed to Emitter methods or defined in metrics/defs.go.
+//
+// This is NOT intended to be used directly, it's just using the analysis framework to simplify type checking.
+// The output needs to be checked for duplicates or error messages by a separate process (see cmd/main.go).
+var Analyzer = &analysis.Analyzer{
+	Name:     "metricnames",
+	Doc:      "finds metricDefinition defs and uses, and reports them so they can be post-processed for uniqueness",
+	Requires: []*analysis.Analyzer{inspect.Analyzer},
+	Run:      run,
+}
+
+func run(pass *analysis.Pass) (interface{}, error) {
+	i := pass.ResultOf[inspect.Analyzer].(*inspector.Inspector)
+
+	if pass.Pkg.Path() == scopePkg {
+		// report all hard-coded metricDefinition names, so we can make sure
+		// there are no duplicates between the integers and the const strings
+		// passed to the emitter.
+		//
+		// if you want a duplicate: no, you really don't.
+		// use a different name, and once verified just swap to the old name and
+		// delete the integer def.  otherwise you risk double-counting events if
+		// both are emitted, and can't tell if your tags are wrong.
+		//
+		// if you're swapping calls in one step, also delete the old definition.
+		//
+		// if you really do have an exception, modify this linter to allow it.
+		reportMetricDefinitionNames(pass, i)
+		// and DO NOT return, in case there are emitter calls in this package too,
+		// though currently that would imply an import loop.
+	}
+
+	// print calls to metrics-emitting methods on Emitter, regardless of location
+	reportScopeCalls(pass, i)
+
+	return nil, nil
+}
+
+func reportMetricDefinitionNames(pass *analysis.Pass, i *inspector.Inspector) {
+	nodeFilter := []ast.Node{
+		(*ast.CompositeLit)(nil),
+	}
+	i.Preorder(nodeFilter, func(n ast.Node) {
+		decl := n.(*ast.CompositeLit)
+		var metricNameField *ast.KeyValueExpr
+		var histogramType string
+		for _, el := range decl.Elts {
+			kv, ok := el.(*ast.KeyValueExpr)
+			if !ok {
+				return // not a struct initializer
+			}
+			ident, ok := kv.Key.(*ast.Ident)
+			if !ok {
+				return // dynamic key in a dict or something, ignore
+			}
+			switch ident.Name {
+			case "metricName":
+				metricNameField = kv // currently this is exclusively used by metricDefinition
+			case "exponentialHistogram":
+				if histogramType != "" {
+					pass.Reportf(kv.Pos(), "only one of exponentialHistogram or intExponentialHistogram can be set")
+				}
+				histogramType = "duration"
+			case "intExponentialHistogram":
+				if histogramType != "" {
+					pass.Reportf(kv.Pos(), "only one of exponentialHistogram or intExponentialHistogram can be set")
+				}
+				histogramType = "int"
+			}
+		}
+		if metricNameField == nil {
+			return
+		}
+		str, ok := getConstantString(metricNameField.Value)
+		if !ok {
+			pass.Reportf(metricNameField.Pos(), "metricDefinition{metricName: ...} value must be an inline string literal")
+			return
+		}
+		switch histogramType {
+		case "duration":
+			if !strings.HasSuffix(str, "_ns") {
+				pass.Reportf(metricNameField.Pos(), "exponential histogram metric names must end with _ns: %q", str)
+				return
+			}
+		case "int":
+			if !strings.HasSuffix(str, "_counts") {
+				pass.Reportf(metricNameField.Pos(), "int-exponential histogram metric names must end with _counts: %q", str)
+				return
+			}
+		}
+		pass.Reportf(metricNameField.Pos(), "success: %v", str)
+	})
+}
+
+func reportScopeCalls(pass *analysis.Pass, i *inspector.Inspector) {
+	nodeFilter := []ast.Node{
+		(*ast.File)(nil),     // to skip test files
+		(*ast.FuncDecl)(nil), // to skip test helper funcs
+		(*ast.CallExpr)(nil),
+	}
+	i.Nodes(nodeFilter, func(n ast.Node, push bool) (proceed bool) {
+		// always descend by default, in case the call contains a closure that emits metrics.
+		// this covers the sync.Once case in the testdata, for example.
+		proceed = true
+		if !push {
+			return // do nothing when ascending the ast tree
+		}
+
+		// check for test files, ignore their content if found
+		file, ok := n.(*ast.File)
+		if ok {
+			filename := pass.Fset.Position(file.Pos()).Filename
+			if strings.HasSuffix(filename, "_test.go") {
+				return false // don't inspect test files
+			}
+			return
+		}
+
+		// check for test helper funcs anywhere, ignore their content if found.
+		// these are identified by a *testing.T as their first arg.
+		if fn, ok := n.(*ast.FuncDecl); ok {
+			if len(fn.Type.Params.List) > 0 {
+				firstArgType := pass.TypesInfo.TypeOf(fn.Type.Params.List[0].Type)
+				asStr := types.TypeString(firstArgType, nil) // "full/path/to.Type"
+				if asStr == "*testing.T" {
+					return false // don't inspect test helpers
+				}
+			}
+			return
+		}
+
+		call := n.(*ast.CallExpr) // only other possibility due to nodeFilter
+
+		// check if this is a method call (receiver.Method() == X.Sel)
+		// for one of the names we care about (as an early / efficient check)
+		sel, ok := call.Fun.(*ast.SelectorExpr)
+		if !ok {
+			return
+		}
+		methodName := sel.Sel.Name
+		if !scopeMethods[methodName] {
+			return
+		}
+
+		// check if the receiver type is one we care about
+		receiver := pass.TypesInfo.TypeOf(sel.X)
+		// pointer receivers are not allowed, complain if found.
+		// if we just ignore them here, we might miss a metric call with a duplicate name.
+		named, ok := receiver.(*types.Named)
+		if !ok {
+			if _, isPtr := receiver.(*types.Pointer); isPtr {
+				// ignore pointer use, e.g. this occurs on the impl of scope itself
+				return
+			}
+			// not named and not pointer == allowed by the type system, but should be impossible in a CallExpr
+			pass.Reportf(sel.Pos(), "anonymous receiver of a method call should be impossible?")
+			return
+		}
+		fullName := getPkgAndName(named)
+		if fullName != scopeTypeName {
+			return // apparently not the scope, it just has similarly-named methods.  ignore it.
+		}
+
+		// at this point we know that it's "a metrics method", make sure it's valid.
+
+		if len(call.Args) == 0 {
+			// 0 args == not currently in use
+			pass.Reportf(call.Pos(), "method call %v looks like a metrics method, but has too few args", call)
+			return
+		}
+
+		filename := pass.Fset.Position(call.Pos()).Filename
+		if strings.HasSuffix(filename, "common/util.go") {
+			// util function that checks key lengths
+			return
+		} else if strings.HasSuffix(filename, "service/worker/archiver/replay_metrics_client.go") {
+			// a mock, ignore the metric-ID variable use
+			return
+		} else if strings.HasSuffix(filename, "service/history/workflowcache/cache.go") {
+			// reasonable usage, internal+external passed in.  maybe worth changing later.
+			return
+		} else if strings.HasSuffix(filename, "service/history/workflowcache/metrics.go") {
+			// reasonable usage, internal+external passed in.  maybe worth changing later.
+			return
+		}
+
+		// pull out the first arg's text and report it for duplicate-checking
+		nameArg := call.Args[0]
+		nameArgText := types.ExprString(nameArg) // reconstructs the expression, but should be fine
+		if !strings.HasPrefix(nameArgText, "metrics.") {
+			pass.Reportf(nameArg.Pos(), "first argument to %v must be a metrics constant, not %v", methodName, nameArgText)
+			return
+		}
+
+		// valid call!
+		//
+		// "report" the data we've found so it can be checked with a separate process (see cmd/main.go).
+		// that will error if any non-"success" lines were found, else check that the list is unique.
+		//
+		// tests may lead to the same package being processed more than once, but
+		// the reported lines for "path/to/file:line success: {name}" are still unique
+		// and that won't count as a duplicate.
+		pass.Reportf(call.Pos(), "success: %s", nameArgText)
+		return
+	})
+}
+
+func getPkgAndName(named *types.Named) (fullName string) {
+	obj := named.Obj()
+	if obj == nil {
+		// afaik not possible, this would be "a named type without a type"
+		return ""
+	}
+	pkg := obj.Pkg()
+	if pkg == nil {
+		// given where this is used, I believe this would currently imply
+		// "a builtin type with a method", which does not exist.
+		// but it's fine to return partial values, in case it's used in more places.
+		return obj.Name()
+	}
+	return pkg.Path() + "." + obj.Name()
+}
+
+func getConstantString(expr ast.Expr) (str string, ok bool) {
+	lit, ok := expr.(*ast.BasicLit)
+	if !ok {
+		return "", false
+	}
+	if lit.Kind != token.STRING {
+		return "", false
+	}
+	return strings.Trim(lit.Value, `"`), true
+}
diff --git a/internal/tools/metricslint/analyzer_test.go b/internal/tools/metricslint/analyzer_test.go
new file mode 100644
index 00000000000..22d1c9c1696
--- /dev/null
+++ b/internal/tools/metricslint/analyzer_test.go
@@ -0,0 +1,12 @@
+package metricslint
+
+import (
+	"testing"
+
+	"golang.org/x/tools/go/analysis/analysistest"
+)
+
+func TestAnalyzer(t *testing.T) {
+	testdata := analysistest.TestData()
+	analysistest.Run(t, testdata, Analyzer, "github.com/uber/cadence/common/metrics")
+}
diff --git a/internal/tools/metricslint/cmd/main.go b/internal/tools/metricslint/cmd/main.go
new file mode 100644
index 00000000000..9547c4f7516
--- /dev/null
+++ b/internal/tools/metricslint/cmd/main.go
@@ -0,0 +1,117 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"os/exec"
+	"slices"
+	"strconv"
+	"strings"
+
+	"golang.org/x/tools/go/analysis/singlechecker"
+
+	"github.com/uber/cadence/internal/tools/metricslint"
+)
+
+func main() {
+	var analyze bool
+	flag.BoolVar(&analyze, "analyze", false, "if true, run the analyzer normally (e.g. for debugging).  if false, run the analyzer internally and interpret the results as a linter")
+	skip := map[string]int{}
+	flag.Func("skip", "metric name to ignore, comma, followed by the number of times it is duplicated.  repeat to skip more things.", func(s string) error {
+		parts := strings.SplitN(s, ",", 2)
+		if len(parts) != 2 {
+			return fmt.Errorf("skip argument %q is not in the form `name,count`", s)
+		}
+		count, err := strconv.Atoi(parts[1])
+		if err != nil || count < 2 {
+			return fmt.Errorf("skip argument %q has invalid count %q: %v", s, parts[1], err)
+		}
+		skip[parts[0]] = count
+		return nil
+	})
+	flag.Parse()
+
+	if analyze {
+		singlechecker.Main(metricslint.Analyzer)
+	} else {
+		os.Exit(checkAnalyzerOutput(skip))
+	}
+}
+
+func checkAnalyzerOutput(skip map[string]int) (exitCode int) {
+	cmd := exec.Command(os.Args[0], append([]string{"-analyze"}, os.Args[1:]...)...)
+	out, _ := cmd.CombinedOutput()
+
+	defer func() {
+		// in case of crashes, print the output we got so far as it likely narrows down the cause.
+		if r := recover(); r != nil {
+			_, _ = fmt.Fprint(os.Stderr, "\n\n"+string(out))
+			panic(r)
+		}
+	}()
+
+	lines := strings.Split(string(out), "\n")
+	// map of metric name to set of lines using it (to deduplicate)
+	names := make(map[string]map[string]struct{}, len(lines))
+	var failures []string
+	for _, line := range lines {
+		if line == "" {
+			continue // empty lines are fine
+		}
+		words := strings.Fields(line)
+		if len(words) == 3 && words[1] == "success:" {
+			prev, ok := names[words[2]]
+			if !ok {
+				prev = make(map[string]struct{})
+			}
+			prev[line] = struct{}{}
+			names[words[2]] = prev
+		} else {
+			failures = append(failures, line)
+		}
+	}
+
+	if len(failures) > 0 {
+		_, _ = fmt.Fprintln(os.Stderr, strings.Join(failures, "\n")+"\n")
+		return 1
+	}
+
+	exitCode = 0
+	sortedNames := make([]string, 0, len(names))
+	for name := range names {
+		sortedNames = append(sortedNames, name)
+	}
+	slices.Sort(sortedNames)
+	for _, name := range sortedNames {
+		usages := names[name]
+		var complain bool
+		expected, ok := skip[name]
+		if ok {
+			complain = len(usages) != expected // expectation mismatch
+		} else {
+			complain = len(usages) > 1 // must not have duplicates
+		}
+		if complain {
+			if expected > 0 {
+				_, _ = fmt.Fprintf(os.Stderr, "Metric name %q used in an unexpected number of places:\n", name)
+			} else {
+				_, _ = fmt.Fprintf(os.Stderr, "Metric name %q used in multiple places:\n", name)
+			}
+			// frustratingly: this can't use `maps.Keys` because it
+			// returns an iterator, not a slice, so it can't be sorted or `...`-spread.
+			// bleh.
+			sorted := make([]string, 0, len(usages))
+			for usage := range usages {
+				sorted = append(sorted, usage)
+			}
+			slices.Sort(sorted)
+			for usage := range usages {
+				_, _ = fmt.Fprintf(os.Stderr, "\t%s\n", usage)
+			}
+			exitCode = 1
+		}
+	}
+
+	return exitCode
+}
diff --git a/internal/tools/metricslint/testdata/go.mod b/internal/tools/metricslint/testdata/go.mod
new file mode 100644
index 00000000000..216898fe865
--- /dev/null
+++ b/internal/tools/metricslint/testdata/go.mod
@@ -0,0 +1,4 @@
+module github.com/uber/cadence/common/metrics
+// ^ must be the same package as the scope and metricDefinition's definition for the hardcoded path to work right
+
+go 1.24
\ No newline at end of file
diff --git a/internal/tools/metricslint/testdata/metrics.go b/internal/tools/metricslint/testdata/metrics.go
new file mode 100644
index 00000000000..02384bf0fce
--- /dev/null
+++ b/internal/tools/metricslint/testdata/metrics.go
@@ -0,0 +1,28 @@
+package metrics
+
+import (
+	"time"
+)
+
+type Scope interface {
+	IncCounter(counter int)
+	AddCounter(counter int, delta int64)
+	StartTimer(timer int) Stopwatch
+	RecordTimer(timer int, d time.Duration)
+	RecordHistogramDuration(timer int, d time.Duration)
+	RecordHistogramValue(timer int, value float64)
+	ExponentialHistogram(hist int, d time.Duration)
+	IntExponentialHistogram(hist int, value int)
+	UpdateGauge(gauge int, value float64)
+	Tagged(tags ...Tag) Scope
+}
+type Tag struct{}
+type Stopwatch interface{}
+
+func do() {
+	var scope Scope
+
+	scope.IncCounter(123)
+	scope.IncCounter(123)
+	scope.ExponentialHistogram(123, 0)
+}
diff --git a/internal/tools/metricslint/testdata/metrics_gen.go b/internal/tools/metricslint/testdata/metrics_gen.go
new file mode 100644
index 00000000000..647411c8d4a
--- /dev/null
+++ b/internal/tools/metricslint/testdata/metrics_gen.go
@@ -0,0 +1,11 @@
+package metrics
+
+// Code generated by test_shenanigans; DO NOT EDIT
+
+func GeneratedFunction() {
+	var scope Scope
+
+	scope.IncCounter(123)
+	scope.IncCounter(123)
+	scope.ExponentialHistogram(123, 0)
+}
diff --git a/internal/tools/metricslint/testdata/metrics_test.go b/internal/tools/metricslint/testdata/metrics_test.go
new file mode 100644
index 00000000000..a367d366dd6
--- /dev/null
+++ b/internal/tools/metricslint/testdata/metrics_test.go
@@ -0,0 +1,11 @@
+package metrics
+
+func ignoredInTestFiles() {
+	var scope Scope
+
+	// test code is allowed to break the rules because it does not run in production
+
+	scope.IncCounter(123)
+	scope.IncCounter(123)
+	scope.ExponentialHistogram(123, 0)
+}
diff --git a/revive.toml b/revive.toml
index 2238d4dd4c9..d1b939df7af 100644
--- a/revive.toml
+++ b/revive.toml
@@ -20,7 +20,7 @@ warningCode = 0
 [rule.errorf]
 # [rule.exported] # disabled due to lack of value / encouraging bad habits
 [rule.if-return]
-[rule.increment-decrement]
+# [rule.increment-decrement] # noisy on generated code, otherwise seems fine - if there's a way to fix that, probably re-enable
 [rule.indent-error-flow]
 # Disabled because we have 158 packages that need package comments; we could instead add ignore
 # directives for existing packages and require it for new packages.
@@ -62,7 +62,7 @@ arguments=[["loop","method-call","recover","return", "immediate-recover"]]
 [rule.range-val-address] # beneficial
 [rule.range-val-in-closure] # beneficial
 [rule.unconditional-recursion] # probably a good idea
-[rule.unreachable-code] # code simplifier
+# [rule.unreachable-code] # reasonable code simplifier, but does not match compiler behavior (e.g. log.Fatal still needs panic or return)
 [rule.waitgroup-by-value] # correct use of sync code, important
 
 #### unused utilities
diff --git a/service/history/replication/task_processor.go b/service/history/replication/task_processor.go
index 2e0c9416950..cc67eaaae42 100644
--- a/service/history/replication/task_processor.go
+++ b/service/history/replication/task_processor.go
@@ -477,6 +477,14 @@ func (p *taskProcessorImpl) processTaskOnce(replicationTask *types.ReplicationTa
 			}
 			mScope = mScope.Tagged(metrics.DomainTag(domainName))
 		}
+		// emit single task processing latency
+		mScope.ExponentialHistogram(metrics.ExponentialTaskProcessingLatency, now.Sub(startTime))
+		// emit latency from task generated to task received
+		mScope.ExponentialHistogram(
+			metrics.ExponentialReplicationTaskLatency,
+			now.Sub(time.Unix(0, replicationTask.GetCreationTime())),
+		)
+
 		// emit single task processing latency
 		mScope.RecordTimer(metrics.TaskProcessingLatency, now.Sub(startTime))
 		// emit latency from task generated to task received
diff --git a/service/worker/archiver/replay_metrics_client.go b/service/worker/archiver/replay_metrics_client.go
index 6ec7e1c94ce..11c42ad3924 100644
--- a/service/worker/archiver/replay_metrics_client.go
+++ b/service/worker/archiver/replay_metrics_client.go
@@ -158,6 +158,20 @@ func (r *replayMetricsScope) RecordHistogramValue(timer int, value float64) {
 	r.scope.RecordHistogramValue(timer, value)
 }
 
+func (r *replayMetricsScope) ExponentialHistogram(hist int, d time.Duration) {
+	if workflow.IsReplaying(r.ctx) {
+		return
+	}
+	r.scope.ExponentialHistogram(hist, d)
+}
+
+func (r *replayMetricsScope) IntExponentialHistogram(hist int, value int) {
+	if workflow.IsReplaying(r.ctx) {
+		return
+	}
+	r.scope.IntExponentialHistogram(hist, value)
+}
+
 // UpdateGauge reports Gauge type absolute value metric
 func (r *replayMetricsScope) UpdateGauge(gauge int, value float64) {
 	if workflow.IsReplaying(r.ctx) {