cadence-workflow · Groxx · Sep 10, 2025 · Sep 4, 2025 · Sep 5, 2025 · Sep 9, 2025
@@ -37,10 +37,12 @@ type (
 
 	// metricDefinition contains the definition for a metric
 	metricDefinition struct {
-		metricType       MetricType    // metric type
-		metricName       MetricName    // metric name
-		metricRollupName MetricName    // optional. if non-empty, this name must be used for rolled-up version of this metric
-		buckets          tally.Buckets // buckets if we are emitting histograms
+		metricType            MetricType    // metric type
+		metricName            MetricName    // metric name
+		metricRollupName      MetricName    // optional. if non-empty, this name must be used for rolled-up version of this metric
+		buckets               tally.Buckets // buckets if we are emitting histograms
+		exponentialBuckets    histogrammy[SubsettableHistogram]
+		intExponentialBuckets histogrammy[IntSubsettableHistogram]
 	}
 
 	// scopeDefinition holds the tag definitions for a scope
@@ -2433,6 +2435,7 @@ const (
 	TaskBatchCompleteCounter
 	TaskBatchCompleteFailure
 	TaskProcessingLatency
+	ExponentialTaskProcessingLatency
 	TaskQueueLatency
 	ScheduleToStartHistoryQueueLatencyPerTaskList
 	TaskRequestsOldScheduler
@@ -2697,6 +2700,7 @@ const (
 	ReplicationTaskCleanupCount
 	ReplicationTaskCleanupFailure
 	ReplicationTaskLatency
+	ExponentialReplicationTaskLatency
 	MutableStateChecksumMismatch
 	MutableStateChecksumInvalidated
 	FailoverMarkerCount
@@ -3208,16 +3212,17 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		RingResolverError: {metricName: "ring_resolver_error", metricType: Counter},
 	},
 	History: {
-		TaskRequests:             {metricName: "task_requests", metricType: Counter},
-		TaskLatency:              {metricName: "task_latency", metricType: Timer},
-		TaskAttemptTimer:         {metricName: "task_attempt", metricType: Timer},
-		TaskFailures:             {metricName: "task_errors", metricType: Counter},
-		TaskDiscarded:            {metricName: "task_errors_discarded", metricType: Counter},
-		TaskStandbyRetryCounter:  {metricName: "task_errors_standby_retry_counter", metricType: Counter},
-		TaskNotActiveCounter:     {metricName: "task_errors_not_active_counter", metricType: Counter},
-		TaskLimitExceededCounter: {metricName: "task_errors_limit_exceeded_counter", metricType: Counter},
-		TaskProcessingLatency:    {metricName: "task_latency_processing", metricType: Timer},
-		TaskQueueLatency:         {metricName: "task_latency_queue", metricType: Timer},
+		TaskRequests:                     {metricName: "task_requests", metricType: Counter},
+		TaskLatency:                      {metricName: "task_latency", metricType: Timer},
+		TaskAttemptTimer:                 {metricName: "task_attempt", metricType: Timer},
+		TaskFailures:                     {metricName: "task_errors", metricType: Counter},
+		TaskDiscarded:                    {metricName: "task_errors_discarded", metricType: Counter},
+		TaskStandbyRetryCounter:          {metricName: "task_errors_standby_retry_counter", metricType: Counter},
+		TaskNotActiveCounter:             {metricName: "task_errors_not_active_counter", metricType: Counter},
+		TaskLimitExceededCounter:         {metricName: "task_errors_limit_exceeded_counter", metricType: Counter},
+		TaskProcessingLatency:            {metricName: "task_latency_processing", metricType: Timer},
+		ExponentialTaskProcessingLatency: {metricName: "task_latency_processing_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		TaskQueueLatency:                 {metricName: "task_latency_queue", metricType: Timer},
 		ScheduleToStartHistoryQueueLatencyPerTaskList: {metricName: "schedule_to_start_history_queue_latency_per_tl", metricType: Timer},
 		TaskRequestsOldScheduler:                      {metricName: "task_requests_old_scheduler", metricType: Counter},
 		TaskRequestsNewScheduler:                      {metricName: "task_requests_new_scheduler", metricType: Counter},
@@ -3474,6 +3479,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		ReplicationTaskCleanupCount:                                  {metricName: "replication_task_cleanup_count", metricType: Counter},
 		ReplicationTaskCleanupFailure:                                {metricName: "replication_task_cleanup_failed", metricType: Counter},
 		ReplicationTaskLatency:                                       {metricName: "replication_task_latency", metricType: Timer},
+		ExponentialReplicationTaskLatency:                            {metricName: "replication_task_latency_ns", metricType: Histogram, exponentialBuckets: Mid1ms24h},
 		MutableStateChecksumMismatch:                                 {metricName: "mutable_state_checksum_mismatch", metricType: Counter},
 		MutableStateChecksumInvalidated:                              {metricName: "mutable_state_checksum_invalidated", metricType: Counter},
 		FailoverMarkerCount:                                          {metricName: "failover_marker_count", metricType: Counter},

@@ -24,6 +24,7 @@ import (
 	"fmt"
 	"math"
 	"regexp"
+	"strings"
 	"testing"
 	"time"
 
@@ -187,6 +188,23 @@ func TestMetricsAreUnique(t *testing.T) {
 	})
 }
 
+func TestHistogramSuffixes(t *testing.T) {
+	for _, serviceMetrics := range MetricDefs {
+		for _, def := range serviceMetrics {
+			if def.intExponentialBuckets != nil {
+				if !strings.HasSuffix(def.metricName.String(), "_counts") {
+					t.Errorf("int-exponential-histogram metric %q should have suffix \"_counts\"", def.metricName.String())
+				}
+			}
+			if def.exponentialBuckets != nil {
+				if !strings.HasSuffix(def.metricName.String(), "_ns") {
+					t.Errorf("exponential-histogram metric %q should have suffix \"_ns\"", def.metricName.String())
+				}
+			}
+		}
+	}
+}
+
 func TestExponentialDurationBuckets(t *testing.T) {
 	factor := math.Pow(2, 0.25)
 	assert.Equal(t, 80, len(ExponentialDurationBuckets))

@@ -0,0 +1,293 @@
+package metrics
+
+import (
+	"fmt"
+	"math"
+	"slices"
+	"strconv"
+	"time"
+
+	"github.com/uber-go/tally"
+)
+
+// Nearly all histograms should use pre-defined buckets here, to encourage consistency.
+//
+// When creating a new one, make sure to read makeSubsettableHistogram for context, and choose
+// values that communicate your *intent*, not the actual final value.  Add a test to show the
+// resulting buckets, so reviewers can see just how much / how little you chose.
+//
+// Similarly, name them more by their intent than their exact ranges, because "it has just barely
+// enough buckets" is almost always not *actually* enough when things misbehave.
+// Choosing by intent also helps make adjusting these values safer if needed (e.g. subsetting or
+// reducing the range), because we can be sure all intents match.
+var (
+	// Default1ms100s is our "default" set of buckets, targeting at least 1ms through 100s,
+	// and is "rounded up" slightly to reach 80 buckets (100s needs 68 buckets),
+	// plus multi-minute exceptions are common enough to support for the small additional cost
+	// (this goes up to ~15m).
+	//
+	// That makes this *relatively* costly, but hopefully good enough for most metrics without
+	// needing any careful thought.  If this proves incorrect, it will be down-scaled without
+	// checking existing uses, so make sure to use something else if you require a certain level
+	// of precision.
+	//
+	// If you need sub-millisecond precision, or substantially longer durations than about 1 minute,
+	// consider using a different histogram.
+	Default1ms100s = makeSubsettableHistogram(2, time.Millisecond, 100*time.Second, 80)
+
+	// Low1ms100s is a half-resolution version of Default1ms100s, intended for use any time the default
+	// is expected to be more costly than we feel comfortable with.  Or for automatic down-scaling
+	// at runtime via dynamic config, if that's ever built.
+	//
+	// This uses only 40 buckets, so it's likely good enough for most purposes, e.g. database operations
+	// that might have high cardinality but should not exceed about a minute even in exceptional cases.
+	// Reducing this to "1ms to 10s" seems reasonable for some cases, but that still needs ~32 buckets,
+	// which isn't a big savings and reduces our ability to notice abnormally slow events.
+	Low1ms100s = Default1ms100s.subsetTo(1)
+
+	// High1ms24h covers things like activity latency, where ranges are very large but
+	// "longer than 1 day" is not particularly worth being precise about.
+	//
+	// This uses a lot of buckets, so it must be used with relatively-low cardinality elsewhere,
+	// and/or consider dual emitting (Mid1ms24h or lower) so overviews can be queried efficiently.
+	High1ms24h = makeSubsettableHistogram(2, time.Millisecond, 24*time.Hour, 112)
+
+	// Mid1ms24h is a one-scale-lower version of High1ms24h,
+	// for use when we know it's too detailed to be worth emitting.
+	//
+	// This uses 56 buckets, half of High1ms24h's 112
+	Mid1ms24h = High1ms24h.subsetTo(1)
+
+	// Mid1To16k is a histogram for small counters, like "how many replication tasks did we receive".
+	//
+	// This targets 1 to ~10k with some buffer, and ends just below 64k.
+	// Do not rely on this for values which can ever reach 64k, make a new histogram.
+	Mid1To16k = IntSubsettableHistogram(makeSubsettableHistogram(2, 1, 16384, 64))
+)
+
+// SubsettableHistogram is a duration-based histogram that can be subset to a lower scale
+// in a standardized, predictable way.  It is intentionally compatible with Prometheus and OTEL's
+// "exponential histograms": https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram
+//
+// These histograms MUST always have a "_ns" suffix in their name to avoid confusion with timers.
+type SubsettableHistogram struct {
+	tallyBuckets tally.DurationBuckets
+
+	scale int
+}
+
+// IntSubsettableHistogram is a non-duration-based integer-distribution histogram, otherwise identical
+// to SubsettableHistogram but built as a separate type so you cannot pass the wrong one.
+//
+// These histograms MUST always have a "_counts" suffix in their name to avoid confusion with timers.
+// (more suffixes will likely be allowed as needed)
+type IntSubsettableHistogram SubsettableHistogram
+
+// currently we have no apparent need for float-histograms,
+// as all our value ranges go from >=1 to many thousands, where
+// decimal precision is pointless and mostly just looks bad.
+//
+// if we ever need 0..1 precision in histograms, we can add them then.
+// 	type FloatSubsettableHistogram struct {
+// 		tally.ValueBuckets
+// 		scale int
+// 	}
+
+// subsetTo takes an existing histogram and reduces its detail level.
+//
+// this can be replaced by simply creating a new histogram with the same args
+// as the original but half the length, but it's added because: 1) it works too,
+// and 2) to document how the process works, because we'll likely be doing this
+// at query time to reduce the level of detail in small / over-crowded graphs.
+func (s SubsettableHistogram) subsetTo(newScale int) SubsettableHistogram {
+	if newScale >= s.scale {
+		panic(fmt.Sprintf("scale %v is not less than the current scale %v", newScale, s.scale))
+	}
+	if newScale < 0 {
+		panic(fmt.Sprintf("negative scales (%v == greater than *2 per step) are possible, but do not have tests yet", newScale))
+	}
+	dup := SubsettableHistogram{
+		tallyBuckets: slices.Clone(s.tallyBuckets),
+		scale:        s.scale,
+	}
+	// compress every other bucket per -1 scale
+	for dup.scale > newScale {
+		if (len(dup.tallyBuckets)-1)%2 != 0 {
+			panic(fmt.Sprintf("cannot subset from scale %v to %v, %v-buckets is not divisible by 2", dup.scale, dup.scale-1, len(dup.tallyBuckets)-1))
+		}
+		half := make(tally.DurationBuckets, 0, ((len(dup.tallyBuckets)-1)/2)+1)
+		half = append(half, dup.tallyBuckets[0]) // keep the zero value intact
+		for i := 1; i < len(dup.tallyBuckets); i += 2 {
+			half = append(half, dup.tallyBuckets[i]) // compress the first, third, etc
+		}
+		dup.tallyBuckets = half
+		dup.scale--
+	}
+	return dup
+}
+
+func (i IntSubsettableHistogram) subsetTo(newScale int) IntSubsettableHistogram {
+	return IntSubsettableHistogram(SubsettableHistogram(i).subsetTo(newScale))
+}
+
+func (s SubsettableHistogram) tags() map[string]string {
+	return map[string]string{
+		// use the duration string func, as "1ms" duration suffix is clearer than "1000000" for nanoseconds.
+		// this also helps differentiate "tracks time" from "may be a general value distribution",
+		// both visually when querying by hand and for any future automation (if needed).
+		"histogram_start": s.start().String(),
+		"histogram_end":   s.end().String(),
+		"histogram_scale": strconv.Itoa(s.scale),
+	}
+}
+
+func (i IntSubsettableHistogram) tags() map[string]string {
+	return map[string]string{
+		"histogram_start": strconv.Itoa(int(i.start())),
+		"histogram_end":   strconv.Itoa(int(i.end())),
+		"histogram_scale": strconv.Itoa(i.scale),
+	}
+}
+
+// makeSubsettableHistogram is a replacement for tally.MustMakeExponentialDurationBuckets,
+// tailored to make "construct a range" or "ensure N buckets" simpler for OTEL-compatible exponential histograms
+// (i.e. https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram).
+//
+// It ensures values are as precise as possible (preventing display-space-wasteful numbers like 3.99996ms),
+// that all histograms start with a 0 value (else erroneous negative values are impossible to notice),
+// and avoids some bugs with low values (tally.MustMakeExponentialDurationBuckets misbehaves for small numbers,
+// causing all buckets to == the minimum value).
+//
+// # To use
+//
+// Choose scale/start/end values that match your *intent*, i.e. the range you want to capture, and then choose
+// a length that EXCEEDS that by at least 2x, ideally 4x-10x, and ends at a highly-divisible-by-2 value.
+//
+// The exact length / exceeding / etc does not matter (just write a test so it's documented and can be reviewed),
+// it just serves to document your intent and to make sure that we have some head-room so we can understand how
+// wrongly we guessed at our needs if it's exceeded during an incident of some kind.  We've failed at this
+// multiple times in the past, it's worth paying a bit extra to be able to diagnose problems.
+//
+// For all histograms produced, add a test to print the concrete values, so they can be quickly checked when reading.
+func makeSubsettableHistogram(scale int, start, end time.Duration, length int) SubsettableHistogram {
+	if scale < 0 || scale > 3 {
+		// anything outside this range is currently not expected and probably a mistake,
+		// but any value is technically sound.
+		panic(fmt.Sprintf("scale must be between 0 (grows by *2) and 3 (grows by *2^1/8), got scale: %v", scale))
+	}
+	if start <= 0 {
+		panic(fmt.Sprintf("start must be greater than 0 or it will not grow exponentially, got %v", start))
+	}
+	if start >= end {
+		panic(fmt.Sprintf("start must be less than end (%v < %v)", start, end))
+	}
+	if length < 12 || length > 160 {
+		// 160 is probably higher than we should consider, but going further is currently risking much too high costs.
+		// if this changes, e.g. due to metrics optimizations, just adjust this validation.
+		//
+		// 12 is pretty arbitrary, I just don't expect us to ever make a histogram that small, so it's probably
+		// from accidentally passing scale or something to the wrong argument.
+		panic(fmt.Sprintf("length must be between 12 < %d <=160", length))
+	}
+	// make sure the number of buckets completes a "full" row,
+	// i.e. the next bucket would be a power of 2 of the start value.
+	//
+	// for a more visual example of what this means, see the logged strings in tests:
+	// each "row" of values must be the same width.
+	//
+	// adding a couple buckets to ensure this is met costs very little, and ensures
+	// subsetting combines values more consistently (not crossing rows) for longer.
+	powerOfTwoWidth := int(math.Pow(2, float64(scale))) // num of buckets needed to double a value
+	missing := length % powerOfTwoWidth
+	if missing != 0 {
+		panic(fmt.Sprintf(`number of buckets must "fill" a power of 2 to end at a consistent row width.  `+
+			`got %d, probably raise to %d`,
+			length, length+missing))
+	}
+
+	var buckets tally.DurationBuckets
+	for i := 0; i < length; i++ {
+		buckets = append(buckets, nextBucket(start, len(buckets), scale))
+	}
+
+	if last(buckets) <= end*2 {
+		panic(fmt.Sprintf("not enough buckets (%d) to exceed the end target (%v) by at least 2x. "+
+			"you are STRONGLY encouraged to include ~2x-10x more range than your intended end value, "+
+			"because we almost always underestimate the ranges needed during incidents",
+			length, last(buckets)),
+		)
+	}
+
+	return SubsettableHistogram{
+		tallyBuckets: append(
+			// always include a zero value at the beginning, so negative values are noticeable ("-inf to 0" bucket)
+			tally.DurationBuckets{0},
+			buckets...,
+		),
+		scale: scale,
+	}
+}
+
+// last-item-in-slice helper to eliminate some magic `-1`s
+func last[T any, X ~[]T](s X) T {
+	return s[len(s)-1]
+}
+
+func nextBucket(start time.Duration, num int, scale int) time.Duration {
+	// calculating it from `start` each time reduces floating point error, ensuring "clean" multiples
+	// at every power of 2 (and possibly others), instead of e.g. "1ms ... 1.9999994ms" which occurs
+	// if you try to build from the previous value each time.
+	return time.Duration(
+		float64(start) *
+			math.Pow(2, float64(num)/math.Pow(2, float64(scale))))
+}
+
+func (s SubsettableHistogram) histScale() int                 { return s.scale }
+func (s SubsettableHistogram) width() int                     { return int(math.Pow(2, float64(s.scale))) }
+func (s SubsettableHistogram) len() int                       { return len(s.tallyBuckets) }
+func (s SubsettableHistogram) start() time.Duration           { return s.tallyBuckets[1] }
+func (s SubsettableHistogram) end() time.Duration             { return s.tallyBuckets[len(s.tallyBuckets)-1] }
+func (s SubsettableHistogram) buckets() tally.DurationBuckets { return s.tallyBuckets }
+func (s SubsettableHistogram) print(to func(string, ...any)) {
+	to("%v\n", s.tallyBuckets[0:1]) // zero value on its own row
+	for rowStart := 1; rowStart < s.len(); rowStart += s.width() {
+		to("%v\n", s.tallyBuckets[rowStart:rowStart+s.width()])
+	}
+}
+
+func (i IntSubsettableHistogram) histScale() int       { return i.scale }
+func (i IntSubsettableHistogram) width() int           { return int(math.Pow(2, float64(i.scale))) }
+func (i IntSubsettableHistogram) len() int             { return len(i.tallyBuckets) }
+func (i IntSubsettableHistogram) start() time.Duration { return i.tallyBuckets[1] }
+func (i IntSubsettableHistogram) end() time.Duration {
+	return i.tallyBuckets[len(i.tallyBuckets)-1]
+}
+func (i IntSubsettableHistogram) buckets() tally.DurationBuckets { return i.tallyBuckets }
+func (i IntSubsettableHistogram) print(to func(string, ...any)) {
+	// fairly unreadable as duration-strings, so convert to int by hand
+	to("[%d]\n", int(i.tallyBuckets[0])) // zero value on its own row
+	for rowStart := 1; rowStart < i.len(); rowStart += i.width() {
+		ints := make([]int, 0, i.width())
+		for _, d := range i.tallyBuckets[rowStart : rowStart+i.width()] {
+			ints = append(ints, int(d))
+		}
+		to("%v\n", ints)
+	}
+}
+
+var _ histogrammy[SubsettableHistogram] = SubsettableHistogram{}
+var _ histogrammy[IntSubsettableHistogram] = IntSubsettableHistogram{}
+
+// internal utility/test methods, but could be exposed if there's a use for it
+type histogrammy[T any] interface {
+	histScale() int                 // exponential scale value.  0..3 inclusive.
+	width() int                     // number of values per power of 2 == how wide to print each row.  1, 2, 4, or 8.
+	len() int                       // number of buckets
+	start() time.Duration           // first non-zero bucket
+	end() time.Duration             // last bucket
+	buckets() tally.DurationBuckets // access to all buckets
+	subsetTo(newScale int) T        // generic so specific types can be returned
+	tags() map[string]string        // start, end, and scale tags that need to be implicitly added
+
+	print(to func(string, ...any)) // test-oriented printer
+}