diff --git a/CHANGELOG.md b/CHANGELOG.md index 29d5010a21a..729c8872ccf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - The default `TranslationStrategy` in `go.opentelemetry.io/exporters/prometheus` is changed from `otlptranslator.NoUTF8EscapingWithSuffixes` to `otlptranslator.UnderscoreEscapingWithSuffixes`. (#7421) - The `ErrorType` function in `go.opentelemetry.io/otel/semconv/v1.37.0` now handles custom error types. If an error implements an `ErrorType() string` method, the return value of that method will be used as the error type. (#7442) +- Improve performance of concurrent measurements in `go.opentelemetry.io/otel/sdk/metric`. (#7427) diff --git a/sdk/metric/exemplar/fixed_size_reservoir.go b/sdk/metric/exemplar/fixed_size_reservoir.go index 6afb3bed3af..453278a0c38 100644 --- a/sdk/metric/exemplar/fixed_size_reservoir.go +++ b/sdk/metric/exemplar/fixed_size_reservoir.go @@ -7,6 +7,7 @@ import ( "context" "math" "math/rand/v2" + "sync" "time" "go.opentelemetry.io/otel/attribute" @@ -37,6 +38,7 @@ var _ Reservoir = &FixedSizeReservoir{} type FixedSizeReservoir struct { reservoir.ConcurrentSafe *storage + mu sync.Mutex // count is the number of measurement seen. count int64 @@ -192,6 +194,8 @@ func (r *FixedSizeReservoir) advance() { // // The Reservoir state is preserved after this call. func (r *FixedSizeReservoir) Collect(dest *[]Exemplar) { + r.mu.Lock() + defer r.mu.Unlock() r.storage.Collect(dest) // Call reset here even though it will reset r.count and restart the random // number series. This will persist any old exemplars as long as no new diff --git a/sdk/metric/exemplar/histogram_reservoir.go b/sdk/metric/exemplar/histogram_reservoir.go index 12cf8d36a63..60c871a4432 100644 --- a/sdk/metric/exemplar/histogram_reservoir.go +++ b/sdk/metric/exemplar/histogram_reservoir.go @@ -7,6 +7,7 @@ import ( "context" "slices" "sort" + "sync" "time" "go.opentelemetry.io/otel/attribute" @@ -42,6 +43,7 @@ var _ Reservoir = &HistogramReservoir{} type HistogramReservoir struct { reservoir.ConcurrentSafe *storage + mu sync.Mutex // bounds are bucket bounds in ascending order. bounds []float64 @@ -76,3 +78,12 @@ func (r *HistogramReservoir) Offer(ctx context.Context, t time.Time, v Value, a defer r.mu.Unlock() r.store(idx, m) } + +// Collect returns all the held exemplars. +// +// The Reservoir state is preserved after this call. +func (r *HistogramReservoir) Collect(dest *[]Exemplar) { + r.mu.Lock() + defer r.mu.Unlock() + r.storage.Collect(dest) +} diff --git a/sdk/metric/exemplar/storage.go b/sdk/metric/exemplar/storage.go index 760c3c87119..16b61c07dec 100644 --- a/sdk/metric/exemplar/storage.go +++ b/sdk/metric/exemplar/storage.go @@ -5,7 +5,6 @@ package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import ( "context" - "sync" "time" "go.opentelemetry.io/otel/attribute" @@ -14,7 +13,6 @@ import ( // storage is an exemplar storage for [Reservoir] implementations. type storage struct { - mu sync.Mutex // measurements are the measurements sampled. // // This does not use []metricdata.Exemplar because it potentially would @@ -34,8 +32,6 @@ func (r *storage) store(idx int, m measurement) { // // The Reservoir state is preserved after this call. func (r *storage) Collect(dest *[]Exemplar) { - r.mu.Lock() - defer r.mu.Unlock() *dest = reset(*dest, len(r.measurements), len(r.measurements)) var n int for _, m := range r.measurements { diff --git a/sdk/metric/internal/aggregate/aggregate.go b/sdk/metric/internal/aggregate/aggregate.go index 0321da68150..2b60410801b 100644 --- a/sdk/metric/internal/aggregate/aggregate.go +++ b/sdk/metric/internal/aggregate/aggregate.go @@ -110,12 +110,13 @@ func (b Builder[N]) PrecomputedSum(monotonic bool) (Measure[N], ComputeAggregati // Sum returns a sum aggregate function input and output. func (b Builder[N]) Sum(monotonic bool) (Measure[N], ComputeAggregation) { - s := newSum[N](monotonic, b.AggregationLimit, b.resFunc()) switch b.Temporality { case metricdata.DeltaTemporality: - return b.filter(s.measure), s.delta + s := newDeltaSum[N](monotonic, b.AggregationLimit, b.resFunc()) + return b.filter(s.measure), s.collect default: - return b.filter(s.measure), s.cumulative + s := newCumulativeSum[N](monotonic, b.AggregationLimit, b.resFunc()) + return b.filter(s.measure), s.collect } } diff --git a/sdk/metric/internal/aggregate/atomic.go b/sdk/metric/internal/aggregate/atomic.go new file mode 100644 index 00000000000..0fa6d3c6fa8 --- /dev/null +++ b/sdk/metric/internal/aggregate/atomic.go @@ -0,0 +1,184 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggregate" + +import ( + "math" + "runtime" + "sync" + "sync/atomic" + + "go.opentelemetry.io/otel/attribute" +) + +// atomicCounter is an efficient way of adding to a number which is either an +// int64 or float64. It is designed to be efficient when adding whole +// numbers, regardless of whether N is an int64 or float64. +// +// Inspired by the Prometheus counter implementation: +// https://github.com/prometheus/client_golang/blob/14ccb93091c00f86b85af7753100aa372d63602b/prometheus/counter.go#L108 +type atomicCounter[N int64 | float64] struct { + // nFloatBits contains only the non-integer portion of the counter. + nFloatBits atomic.Uint64 + // nInt contains only the integer portion of the counter. + nInt atomic.Int64 +} + +// load returns the current value. The caller must ensure all calls to add have +// returned prior to calling load. +func (n *atomicCounter[N]) load() N { + fval := math.Float64frombits(n.nFloatBits.Load()) + ival := n.nInt.Load() + return N(fval + float64(ival)) +} + +func (n *atomicCounter[N]) add(value N) { + ival := int64(value) + // This case is where the value is an int, or if it is a whole-numbered float. + if float64(ival) == float64(value) { + n.nInt.Add(ival) + return + } + + // Value must be a float below. + for { + oldBits := n.nFloatBits.Load() + newBits := math.Float64bits(math.Float64frombits(oldBits) + float64(value)) + if n.nFloatBits.CompareAndSwap(oldBits, newBits) { + return + } + } +} + +// hotColdWaitGroup is a synchronization primitive which enables lockless +// writes for concurrent writers and enables a reader to acquire exclusive +// access to a snapshot of state including only completed operations. +// Conceptually, it can be thought of as a "hot" wait group, +// and a "cold" wait group, with the ability for the reader to atomically swap +// the hot and cold wait groups, and wait for the now-cold wait group to +// complete. +// +// Inspired by the prometheus/client_golang histogram implementation: +// https://github.com/prometheus/client_golang/blob/a974e0d45e0aa54c65492559114894314d8a2447/prometheus/histogram.go#L725 +// +// Usage: +// +// var hcwg hotColdWaitGroup +// var data [2]any +// +// func write() { +// hotIdx := hcwg.start() +// defer hcwg.done(hotIdx) +// // modify data without locking +// data[hotIdx].update() +// } +// +// func read() { +// coldIdx := hcwg.swapHotAndWait() +// // read data now that all writes to the cold data have completed. +// data[coldIdx].read() +// } +type hotColdWaitGroup struct { + // startedCountAndHotIdx contains a 63-bit counter in the lower bits, + // and a 1 bit hot index to denote which of the two data-points new + // measurements to write to. These are contained together so that read() + // can atomically swap the hot bit, reset the started writes to zero, and + // read the number writes that were started prior to the hot bit being + // swapped. + startedCountAndHotIdx atomic.Uint64 + // endedCounts is the number of writes that have completed to each + // dataPoint. + endedCounts [2]atomic.Uint64 +} + +// start returns the hot index that the writer should write to. The returned +// hot index is 0 or 1. The caller must call done(hot index) after it finishes +// its operation. start() is safe to call concurrently with other methods. +func (l *hotColdWaitGroup) start() uint64 { + // We increment h.startedCountAndHotIdx so that the counter in the lower + // 63 bits gets incremented. At the same time, we get the new value + // back, which we can use to return the currently-hot index. + return l.startedCountAndHotIdx.Add(1) >> 63 +} + +// done signals to the reader that an operation has fully completed. +// done is safe to call concurrently. +func (l *hotColdWaitGroup) done(hotIdx uint64) { + l.endedCounts[hotIdx].Add(1) +} + +// swapHotAndWait swaps the hot bit, waits for all start() calls to be done(), +// and then returns the now-cold index for the reader to read from. The +// returned index is 0 or 1. swapHotAndWait must not be called concurrently. +func (l *hotColdWaitGroup) swapHotAndWait() uint64 { + n := l.startedCountAndHotIdx.Load() + coldIdx := (^n) >> 63 + // Swap the hot and cold index while resetting the started measurements + // count to zero. + n = l.startedCountAndHotIdx.Swap((coldIdx << 63)) + hotIdx := n >> 63 + startedCount := n & ((1 << 63) - 1) + // Wait for all measurements to the previously-hot map to finish. + for startedCount != l.endedCounts[hotIdx].Load() { + runtime.Gosched() // Let measurements complete. + } + // reset the number of ended operations + l.endedCounts[hotIdx].Store(0) + return hotIdx +} + +// limitedSyncMap is a sync.Map which enforces the aggregation limit on +// attribute sets and provides a Len() function. +type limitedSyncMap struct { + sync.Map + aggLimit int + len int + lenMux sync.Mutex +} + +func (m *limitedSyncMap) LoadOrStoreAttr(fltrAttr attribute.Set, newValue func(attribute.Set) any) any { + actual, loaded := m.Load(fltrAttr.Equivalent()) + if loaded { + return actual + } + // If the overflow set exists, assume we have already overflowed and don't + // bother with the slow path below. + actual, loaded = m.Load(overflowSet.Equivalent()) + if loaded { + return actual + } + // Slow path: add a new attribute set. + m.lenMux.Lock() + defer m.lenMux.Unlock() + + // re-fetch now that we hold the lock to ensure we don't use the overflow + // set unless we are sure the attribute set isn't being written + // concurrently. + actual, loaded = m.Load(fltrAttr.Equivalent()) + if loaded { + return actual + } + + if m.aggLimit > 0 && m.len >= m.aggLimit-1 { + fltrAttr = overflowSet + } + actual, loaded = m.LoadOrStore(fltrAttr.Equivalent(), newValue(fltrAttr)) + if !loaded { + m.len++ + } + return actual +} + +func (m *limitedSyncMap) Clear() { + m.lenMux.Lock() + defer m.lenMux.Unlock() + m.len = 0 + m.Map.Clear() +} + +func (m *limitedSyncMap) Len() int { + m.lenMux.Lock() + defer m.lenMux.Unlock() + return m.len +} diff --git a/sdk/metric/internal/aggregate/atomic_test.go b/sdk/metric/internal/aggregate/atomic_test.go new file mode 100644 index 00000000000..52f053248d7 --- /dev/null +++ b/sdk/metric/internal/aggregate/atomic_test.go @@ -0,0 +1,78 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggregate" + +import ( + "math" + "sync" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAtomicSumAddFloatConcurrentSafe(t *testing.T) { + var wg sync.WaitGroup + var aSum atomicCounter[float64] + for _, in := range []float64{ + 0.2, + 0.25, + 1.6, + 10.55, + 42.4, + } { + wg.Add(1) + go func() { + defer wg.Done() + aSum.add(in) + }() + } + wg.Wait() + assert.Equal(t, float64(55), math.Round(aSum.load())) +} + +func TestAtomicSumAddIntConcurrentSafe(t *testing.T) { + var wg sync.WaitGroup + var aSum atomicCounter[int64] + for _, in := range []int64{ + 1, + 2, + 3, + 4, + 5, + } { + wg.Add(1) + go func() { + defer wg.Done() + aSum.add(in) + }() + } + wg.Wait() + assert.Equal(t, int64(15), aSum.load()) +} + +func TestHotColdWaitGroupConcurrentSafe(t *testing.T) { + var wg sync.WaitGroup + hcwg := &hotColdWaitGroup{} + var data [2]uint64 + for range 5 { + wg.Add(1) + go func() { + defer wg.Done() + hotIdx := hcwg.start() + defer hcwg.done(hotIdx) + atomic.AddUint64(&data[hotIdx], 1) + }() + } + for range 2 { + readIdx := hcwg.swapHotAndWait() + assert.NotPanics(t, func() { + // reading without using atomics should not panic since we are + // reading from the cold element, and have waited for all writes to + // finish. + t.Logf("read value %+v", data[readIdx]) + }) + } + wg.Wait() +} diff --git a/sdk/metric/internal/aggregate/exponential_histogram.go b/sdk/metric/internal/aggregate/exponential_histogram.go index cf4e86acf98..5b3a19c067d 100644 --- a/sdk/metric/internal/aggregate/exponential_histogram.go +++ b/sdk/metric/internal/aggregate/exponential_histogram.go @@ -301,7 +301,7 @@ func newExponentialHistogram[N int64 | float64]( maxScale: maxScale, newRes: r, - limit: newLimiter[*expoHistogramDataPoint[N]](limit), + limit: newLimiter[expoHistogramDataPoint[N]](limit), values: make(map[attribute.Distinct]*expoHistogramDataPoint[N]), start: now(), @@ -317,7 +317,7 @@ type expoHistogram[N int64 | float64] struct { maxScale int32 newRes func(attribute.Set) FilteredExemplarReservoir[N] - limit limiter[*expoHistogramDataPoint[N]] + limit limiter[expoHistogramDataPoint[N]] values map[attribute.Distinct]*expoHistogramDataPoint[N] valuesMu sync.Mutex diff --git a/sdk/metric/internal/aggregate/filtered_reservoir.go b/sdk/metric/internal/aggregate/filtered_reservoir.go index b42dfd357ea..e4f9409bc80 100644 --- a/sdk/metric/internal/aggregate/filtered_reservoir.go +++ b/sdk/metric/internal/aggregate/filtered_reservoir.go @@ -54,12 +54,12 @@ func NewFilteredExemplarReservoir[N int64 | float64]( func (f *filteredExemplarReservoir[N]) Offer(ctx context.Context, val N, attr []attribute.KeyValue) { if f.filter(ctx) { + // only record the current time if we are sampling this measurement. ts := time.Now() if !f.concurrentSafe { f.reservoirMux.Lock() defer f.reservoirMux.Unlock() } - // only record the current time if we are sampling this measurement. f.reservoir.Offer(ctx, ts, exemplar.NewValue(val), attr) } } diff --git a/sdk/metric/internal/aggregate/histogram.go b/sdk/metric/internal/aggregate/histogram.go index 639c4d002c9..a094519cf6d 100644 --- a/sdk/metric/internal/aggregate/histogram.go +++ b/sdk/metric/internal/aggregate/histogram.go @@ -52,7 +52,7 @@ type histValues[N int64 | float64] struct { bounds []float64 newRes func(attribute.Set) FilteredExemplarReservoir[N] - limit limiter[*buckets[N]] + limit limiter[buckets[N]] values map[attribute.Distinct]*buckets[N] valuesMu sync.Mutex } @@ -74,7 +74,7 @@ func newHistValues[N int64 | float64]( noSum: noSum, bounds: b, newRes: r, - limit: newLimiter[*buckets[N]](limit), + limit: newLimiter[buckets[N]](limit), values: make(map[attribute.Distinct]*buckets[N]), } } diff --git a/sdk/metric/internal/aggregate/lastvalue.go b/sdk/metric/internal/aggregate/lastvalue.go index 6faf4920c70..3e2ed741505 100644 --- a/sdk/metric/internal/aggregate/lastvalue.go +++ b/sdk/metric/internal/aggregate/lastvalue.go @@ -23,7 +23,7 @@ func newLastValue[N int64 | float64](limit int, r func(attribute.Set) FilteredEx return &lastValue[N]{ newRes: r, limit: newLimiter[datapoint[N]](limit), - values: make(map[attribute.Distinct]datapoint[N]), + values: make(map[attribute.Distinct]*datapoint[N]), start: now(), } } @@ -34,7 +34,7 @@ type lastValue[N int64 | float64] struct { newRes func(attribute.Set) FilteredExemplarReservoir[N] limit limiter[datapoint[N]] - values map[attribute.Distinct]datapoint[N] + values map[attribute.Distinct]*datapoint[N] start time.Time } @@ -45,9 +45,10 @@ func (s *lastValue[N]) measure(ctx context.Context, value N, fltrAttr attribute. d, ok := s.values[fltrAttr.Equivalent()] if !ok { fltrAttr = s.limit.Attributes(fltrAttr, s.values) - d = s.values[fltrAttr.Equivalent()] - d.res = s.newRes(fltrAttr) - d.attrs = fltrAttr + d = &datapoint[N]{ + res: s.newRes(fltrAttr), + attrs: fltrAttr, + } } d.value = value diff --git a/sdk/metric/internal/aggregate/limit.go b/sdk/metric/internal/aggregate/limit.go index 9ea0251edd7..c19a1aff68f 100644 --- a/sdk/metric/internal/aggregate/limit.go +++ b/sdk/metric/internal/aggregate/limit.go @@ -30,7 +30,7 @@ func newLimiter[V any](aggregation int) limiter[V] { // aggregation cardinality limit for the existing measurements. If it will, // overflowSet is returned. Otherwise, if it will not exceed the limit, or the // limit is not set (limit <= 0), attr is returned. -func (l limiter[V]) Attributes(attrs attribute.Set, measurements map[attribute.Distinct]V) attribute.Set { +func (l limiter[V]) Attributes(attrs attribute.Set, measurements map[attribute.Distinct]*V) attribute.Set { if l.aggLimit > 0 { _, exists := measurements[attrs.Equivalent()] if !exists && len(measurements) >= l.aggLimit-1 { diff --git a/sdk/metric/internal/aggregate/limit_test.go b/sdk/metric/internal/aggregate/limit_test.go index c61bae0e24f..236c1af43af 100644 --- a/sdk/metric/internal/aggregate/limit_test.go +++ b/sdk/metric/internal/aggregate/limit_test.go @@ -12,7 +12,8 @@ import ( ) func TestLimiterAttributes(t *testing.T) { - m := map[attribute.Distinct]struct{}{alice.Equivalent(): {}} + var val struct{} + m := map[attribute.Distinct]*struct{}{alice.Equivalent(): &val} t.Run("NoLimit", func(t *testing.T) { l := newLimiter[struct{}](0) assert.Equal(t, alice, l.Attributes(alice, m)) @@ -43,7 +44,8 @@ func TestLimiterAttributes(t *testing.T) { var limitedAttr attribute.Set func BenchmarkLimiterAttributes(b *testing.B) { - m := map[attribute.Distinct]struct{}{alice.Equivalent(): {}} + var val struct{} + m := map[attribute.Distinct]*struct{}{alice.Equivalent(): &val} l := newLimiter[struct{}](2) b.ReportAllocs() diff --git a/sdk/metric/internal/aggregate/sum.go b/sdk/metric/internal/aggregate/sum.go index 164feb86797..81690855114 100644 --- a/sdk/metric/internal/aggregate/sum.go +++ b/sdk/metric/internal/aggregate/sum.go @@ -5,7 +5,6 @@ package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggreg import ( "context" - "sync" "time" "go.opentelemetry.io/otel/attribute" @@ -13,65 +12,75 @@ import ( ) type sumValue[N int64 | float64] struct { - n N + n atomicCounter[N] res FilteredExemplarReservoir[N] attrs attribute.Set } -// valueMap is the storage for sums. type valueMap[N int64 | float64] struct { - sync.Mutex + values limitedSyncMap newRes func(attribute.Set) FilteredExemplarReservoir[N] - limit limiter[sumValue[N]] - values map[attribute.Distinct]sumValue[N] } -func newValueMap[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *valueMap[N] { - return &valueMap[N]{ - newRes: r, - limit: newLimiter[sumValue[N]](limit), - values: make(map[attribute.Distinct]sumValue[N]), - } -} - -func (s *valueMap[N]) measure(ctx context.Context, value N, fltrAttr attribute.Set, droppedAttr []attribute.KeyValue) { - s.Lock() - defer s.Unlock() - - v, ok := s.values[fltrAttr.Equivalent()] - if !ok { - fltrAttr = s.limit.Attributes(fltrAttr, s.values) - v = s.values[fltrAttr.Equivalent()] - v.res = s.newRes(fltrAttr) - v.attrs = fltrAttr - } - - v.n += value - v.res.Offer(ctx, value, droppedAttr) - - s.values[fltrAttr.Equivalent()] = v +func (s *valueMap[N]) measure( + ctx context.Context, + value N, + fltrAttr attribute.Set, + droppedAttr []attribute.KeyValue, +) { + sv := s.values.LoadOrStoreAttr(fltrAttr, func(attr attribute.Set) any { + return &sumValue[N]{ + res: s.newRes(attr), + attrs: attr, + } + }).(*sumValue[N]) + sv.n.add(value) + // It is possible for collection to race with measurement and observe the + // exemplar in the batch of metrics after the add() for cumulative sums. + // This is an accepted tradeoff to avoid locking during measurement. + sv.res.Offer(ctx, value, droppedAttr) } -// newSum returns an aggregator that summarizes a set of measurements as their -// arithmetic sum. Each sum is scoped by attributes and the aggregation cycle -// the measurements were made in. -func newSum[N int64 | float64](monotonic bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *sum[N] { - return &sum[N]{ - valueMap: newValueMap[N](limit, r), +// newDeltaSum returns an aggregator that summarizes a set of measurements as +// their arithmetic sum. Each sum is scoped by attributes and the aggregation +// cycle the measurements were made in. +func newDeltaSum[N int64 | float64]( + monotonic bool, + limit int, + r func(attribute.Set) FilteredExemplarReservoir[N], +) *deltaSum[N] { + return &deltaSum[N]{ monotonic: monotonic, start: now(), + hotColdValMap: [2]valueMap[N]{ + { + values: limitedSyncMap{aggLimit: limit}, + newRes: r, + }, + { + values: limitedSyncMap{aggLimit: limit}, + newRes: r, + }, + }, } } -// sum summarizes a set of measurements made as their arithmetic sum. -type sum[N int64 | float64] struct { - *valueMap[N] - +// deltaSum is the storage for sums which resets every collection interval. +type deltaSum[N int64 | float64] struct { monotonic bool start time.Time + + hcwg hotColdWaitGroup + hotColdValMap [2]valueMap[N] +} + +func (s *deltaSum[N]) measure(ctx context.Context, value N, fltrAttr attribute.Set, droppedAttr []attribute.KeyValue) { + hotIdx := s.hcwg.start() + defer s.hcwg.done(hotIdx) + s.hotColdValMap[hotIdx].measure(ctx, value, fltrAttr, droppedAttr) } -func (s *sum[N]) delta( +func (s *deltaSum[N]) collect( dest *metricdata.Aggregation, //nolint:gocritic // The pointer is needed for the ComputeAggregation interface ) int { t := now() @@ -82,33 +91,61 @@ func (s *sum[N]) delta( sData.Temporality = metricdata.DeltaTemporality sData.IsMonotonic = s.monotonic - s.Lock() - defer s.Unlock() - - n := len(s.values) + // delta always clears values on collection + readIdx := s.hcwg.swapHotAndWait() + // The len will not change while we iterate over values, since we waited + // for all writes to finish to the cold values and len. + n := s.hotColdValMap[readIdx].values.Len() dPts := reset(sData.DataPoints, n, n) var i int - for _, val := range s.values { + s.hotColdValMap[readIdx].values.Range(func(_, value any) bool { + val := value.(*sumValue[N]) + collectExemplars(&dPts[i].Exemplars, val.res.Collect) dPts[i].Attributes = val.attrs dPts[i].StartTime = s.start dPts[i].Time = t - dPts[i].Value = val.n - collectExemplars(&dPts[i].Exemplars, val.res.Collect) + dPts[i].Value = val.n.load() i++ - } - // Do not report stale values. - clear(s.values) + return true + }) + s.hotColdValMap[readIdx].values.Clear() // The delta collection cycle resets. s.start = t sData.DataPoints = dPts *dest = sData - return n + return i +} + +// newCumulativeSum returns an aggregator that summarizes a set of measurements +// as their arithmetic sum. Each sum is scoped by attributes and the +// aggregation cycle the measurements were made in. +func newCumulativeSum[N int64 | float64]( + monotonic bool, + limit int, + r func(attribute.Set) FilteredExemplarReservoir[N], +) *cumulativeSum[N] { + return &cumulativeSum[N]{ + monotonic: monotonic, + start: now(), + valueMap: valueMap[N]{ + values: limitedSyncMap{aggLimit: limit}, + newRes: r, + }, + } } -func (s *sum[N]) cumulative( +// deltaSum is the storage for sums which never reset. +type cumulativeSum[N int64 | float64] struct { + monotonic bool + start time.Time + + valueMap[N] +} + +func (s *cumulativeSum[N]) collect( dest *metricdata.Aggregation, //nolint:gocritic // The pointer is needed for the ComputeAggregation interface ) int { t := now() @@ -119,30 +156,33 @@ func (s *sum[N]) cumulative( sData.Temporality = metricdata.CumulativeTemporality sData.IsMonotonic = s.monotonic - s.Lock() - defer s.Unlock() - - n := len(s.values) - dPts := reset(sData.DataPoints, n, n) + // Values are being concurrently written while we iterate, so only use the + // current length for capacity. + dPts := reset(sData.DataPoints, 0, s.values.Len()) var i int - for _, value := range s.values { - dPts[i].Attributes = value.attrs - dPts[i].StartTime = s.start - dPts[i].Time = t - dPts[i].Value = value.n - collectExemplars(&dPts[i].Exemplars, value.res.Collect) + s.values.Range(func(_, value any) bool { + val := value.(*sumValue[N]) + newPt := metricdata.DataPoint[N]{ + Attributes: val.attrs, + StartTime: s.start, + Time: t, + Value: val.n.load(), + } + collectExemplars(&newPt.Exemplars, val.res.Collect) + dPts = append(dPts, newPt) // TODO (#3006): This will use an unbounded amount of memory if there // are unbounded number of attribute sets being aggregated. Attribute // sets that become "stale" need to be forgotten so this will not // overload the system. i++ - } + return true + }) sData.DataPoints = dPts *dest = sData - return n + return i } // newPrecomputedSum returns an aggregator that summarizes a set of @@ -154,27 +194,22 @@ func newPrecomputedSum[N int64 | float64]( r func(attribute.Set) FilteredExemplarReservoir[N], ) *precomputedSum[N] { return &precomputedSum[N]{ - valueMap: newValueMap[N](limit, r), - monotonic: monotonic, - start: now(), + deltaSum: newDeltaSum(monotonic, limit, r), } } // precomputedSum summarizes a set of observations as their arithmetic sum. type precomputedSum[N int64 | float64] struct { - *valueMap[N] + *deltaSum[N] - monotonic bool - start time.Time - - reported map[attribute.Distinct]N + reported map[any]N } func (s *precomputedSum[N]) delta( dest *metricdata.Aggregation, //nolint:gocritic // The pointer is needed for the ComputeAggregation interface ) int { t := now() - newReported := make(map[attribute.Distinct]N) + newReported := make(map[any]N) // If *dest is not a metricdata.Sum, memory reuse is missed. In that case, // use the zero-value sData and hope for better alignment next cycle. @@ -182,27 +217,29 @@ func (s *precomputedSum[N]) delta( sData.Temporality = metricdata.DeltaTemporality sData.IsMonotonic = s.monotonic - s.Lock() - defer s.Unlock() - - n := len(s.values) + // delta always clears values on collection + readIdx := s.hcwg.swapHotAndWait() + // The len will not change while we iterate over values, since we waited + // for all writes to finish to the cold values and len. + n := s.hotColdValMap[readIdx].values.Len() dPts := reset(sData.DataPoints, n, n) var i int - for key, value := range s.values { - delta := value.n - s.reported[key] + s.hotColdValMap[readIdx].values.Range(func(key, value any) bool { + val := value.(*sumValue[N]) + n := val.n.load() - dPts[i].Attributes = value.attrs + delta := n - s.reported[key] + collectExemplars(&dPts[i].Exemplars, val.res.Collect) + dPts[i].Attributes = val.attrs dPts[i].StartTime = s.start dPts[i].Time = t dPts[i].Value = delta - collectExemplars(&dPts[i].Exemplars, value.res.Collect) - - newReported[key] = value.n + newReported[key] = n i++ - } - // Unused attribute sets do not report. - clear(s.values) + return true + }) + s.hotColdValMap[readIdx].values.Clear() s.reported = newReported // The delta collection cycle resets. s.start = t @@ -210,7 +247,7 @@ func (s *precomputedSum[N]) delta( sData.DataPoints = dPts *dest = sData - return n + return i } func (s *precomputedSum[N]) cumulative( @@ -224,27 +261,28 @@ func (s *precomputedSum[N]) cumulative( sData.Temporality = metricdata.CumulativeTemporality sData.IsMonotonic = s.monotonic - s.Lock() - defer s.Unlock() - - n := len(s.values) + // cumulative precomputed always clears values on collection + readIdx := s.hcwg.swapHotAndWait() + // The len will not change while we iterate over values, since we waited + // for all writes to finish to the cold values and len. + n := s.hotColdValMap[readIdx].values.Len() dPts := reset(sData.DataPoints, n, n) var i int - for _, val := range s.values { + s.hotColdValMap[readIdx].values.Range(func(_, value any) bool { + val := value.(*sumValue[N]) + collectExemplars(&dPts[i].Exemplars, val.res.Collect) dPts[i].Attributes = val.attrs dPts[i].StartTime = s.start dPts[i].Time = t - dPts[i].Value = val.n - collectExemplars(&dPts[i].Exemplars, val.res.Collect) - + dPts[i].Value = val.n.load() i++ - } - // Unused attribute sets do not report. - clear(s.values) + return true + }) + s.hotColdValMap[readIdx].values.Clear() sData.DataPoints = dPts *dest = sData - return n + return i }