Testing: Run sync measure benchmarks in parallel (#7113)

dashpole · web-flow · commit b7610a72a7a3 · 2025-08-06T08:50:18.000-04:00
I am looking into https://promlabs.com/blog/2025/07/17/why-i-recommend-native-prometheus-instrumentation-over-opentelemetry/#comparing-counter-increment-performance, which seems to suggest the OTel metrics SDK performs poorly when a counter is incremented concurrently. It is potentially a bit of an artificial benchmark, but does suggest there is some contention beyond just the fact that they are incrementing an atomic integer... Original benchmarks from the blog post: https://github.com/promlabs/prometheus-otel-benchmarks/blob/main/otel_test.go ``` $ go test -run=xxxxxMatchNothingxxxxx -cpu=24 -test.benchtime=1s -bench=BenchmarkSyncMeasure/NoView/ goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/0-24 3946789 313.2 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/1-24 3420992 374.4 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/10-24 574608 1745 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/0-24 3996166 281.1 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/1-24 3091573 367.1 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/10-24 705693 1660 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/0-24 4098727 296.4 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/1-24 3029276 355.4 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/10-24 605174 1803 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/0-24 4057765 298.6 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/1-24 3384812 366.9 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/10-24 714900 1742 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/0-24 3274644 364.3 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/1-24 3780115 316.1 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/10-24 1294364 993.5 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/0-24 3543817 343.2 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/1-24 3523102 335.8 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/10-24 1329352 956.3 ns/op PASS ok go.opentelemetry.io/otel/sdk/metric 27.504s ``` ``` $ go test -run=xxxxxMatchNothingxxxxx -cpu=1 -test.benchtime=1s -bench=BenchmarkSyncMeasure/NoView/ goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/metric cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/0 9905773 121.3 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/1 4079145 296.5 ns/op BenchmarkSyncMeasure/NoView/Int64Counter/Attributes/10 781627 1531 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/0 10017988 120.2 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/1 4055418 296.4 ns/op BenchmarkSyncMeasure/NoView/Float64Counter/Attributes/10 761139 1540 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/0 10017126 121.1 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/1 4037232 295.3 ns/op BenchmarkSyncMeasure/NoView/Int64UpDownCounter/Attributes/10 757010 1539 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/0 10122925 119.0 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/1 4070942 293.8 ns/op BenchmarkSyncMeasure/NoView/Float64UpDownCounter/Attributes/10 788176 1542 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/0 10794142 110.8 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/1 5929494 201.0 ns/op BenchmarkSyncMeasure/NoView/Int64Histogram/Attributes/10 1449292 825.4 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/0 10875385 110.1 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/1 5903116 202.4 ns/op BenchmarkSyncMeasure/NoView/Float64Histogram/Attributes/10 1459578 827.4 ns/op PASS ok go.opentelemetry.io/otel/sdk/metric 25.688s ``` Results are significantly worse (almost > 2x in some cases) with parallelism, but don't initially seem as bad as the blog post suggests. I only have 24 cores, so I can't test higher numbers. Do we want to have parallel benchmarks in addition to our current non-parallel ones?
diff --git a/sdk/metric/benchmark_test.go b/sdk/metric/benchmark_test.go
@@ -113,19 +113,19 @@ func benchMeasAttrs(meas measF) func(*testing.B) {
 	return func(b *testing.B) {
 		b.Run("Attributes/0", func(b *testing.B) {
 			f := meas(*attribute.EmptySet())
-			b.ReportAllocs()
-			b.ResetTimer()
-			for n := 0; n < b.N; n++ {
-				f()
-			}
+			b.RunParallel(func(pb *testing.PB) {
+				for pb.Next() {
+					f()
+				}
+			})
 		})
 		b.Run("Attributes/1", func(b *testing.B) {
 			f := meas(attribute.NewSet(attribute.Bool("K", true)))
-			b.ReportAllocs()
-			b.ResetTimer()
-			for n := 0; n < b.N; n++ {
-				f()
-			}
+			b.RunParallel(func(pb *testing.PB) {
+				for pb.Next() {
+					f()
+				}
+			})
 		})
 		b.Run("Attributes/10", func(b *testing.B) {
 			n := 10
@@ -135,11 +135,11 @@ func benchMeasAttrs(meas measF) func(*testing.B) {
 				attrs = append(attrs, attribute.Int(strconv.Itoa(i), i))
 			}
 			f := meas(attribute.NewSet(attrs...))
-			b.ReportAllocs()
-			b.ResetTimer()
-			for n := 0; n < b.N; n++ {
-				f()
-			}
+			b.RunParallel(func(pb *testing.PB) {
+				for pb.Next() {
+					f()
+				}
+			})
 		})
 	}
 }