Skip to content

Commit 075e523

Browse files
committed
microbench-ci: compare only outer run when insignificant
Previously, during the comparison step all runs were compared. Since the last run determines if the previous runs had significant changes, we now first compare only the last outer run of each benchmark. If the last run had significant changes, we then compare all runs to produce the final assessment. This prevents having a compare summary on CI that shows a regressions, when in fact only one run possibly had a regression, followed by an insignificant change in the last run. Epic: None Release note: None
1 parent d9bbf60 commit 075e523

File tree

2 files changed

+36
-93
lines changed

2 files changed

+36
-93
lines changed

pkg/cmd/microbench-ci/compare.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,27 @@ func (b *Benchmark) compare(lines int) (*CompareResult, error) {
121121
return &compareResult, nil
122122
}
123123

124-
// compareBenchmarks compares the metrics of all benchmarks between two revisions.
124+
// compareBenchmarks compares the metrics of all benchmarks between two
125+
// revisions. It first compares only the last outer run of each benchmark. If
126+
// the last run had significant changes, it compares the metrics of all runs.
127+
// This is because the last run would only have completed with significant
128+
// changes if all the previous runs had them as well, and then we want to
129+
// include it in the final assessment. In contrast if the last run had no
130+
// significant changes, it is possible that the previous runs had significant
131+
// changes, and we don't want to include them in the final assessment.
125132
func (b Benchmarks) compareBenchmarks() (CompareResults, error) {
126133
compareResults := make(CompareResults, 0, len(b))
127134
for _, benchmark := range b {
128-
compareResult, err := benchmark.compare(0)
135+
compareResult, err := benchmark.compare(benchmark.Count)
129136
if err != nil {
130137
return nil, err
131138
}
139+
if compareResult.top() != NoChange {
140+
compareResult, err = benchmark.compare(0)
141+
if err != nil {
142+
return nil, err
143+
}
144+
}
132145
compareResults = append(compareResults, compareResult)
133146
}
134147
return compareResults, nil

pkg/cmd/microbench-ci/testdata/summary.txt

Lines changed: 21 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ run group=1
7272

7373
| Metric | Old Commit | New Commit | Delta | Note |
7474
|-----------------------------|----------------|----------------|------------|--------------|
75-
| ⚪ **sec/op** | 9.852m ±0% | 9.880m ±1% | ~ | p=0.084 n=20 |
76-
| ⚪ **allocs/op** | 10.38k ±0% | 10.38k ±0% | ~ | p=1.000 n=20 |
75+
| ⚪ **sec/op** | 9.852m ±0% | 9.852m ±0% | ~ | p=1.000 n=10 |
76+
| ⚪ **allocs/op** | 10.38k ±1% | 10.38k ±1% | ~ | p=1.000 n=10 |
7777

7878
<details><summary>Reproduce</summary>
7979

@@ -161,32 +161,22 @@ json
161161
"Metric": "B/op",
162162
"Summary": {
163163
"Center": 2367667,
164-
"Lo": 2364281,
165-
"Hi": 2369187,
166-
"Confidence": 0.95861,
164+
"Lo": 2358650,
165+
"Hi": 2370670,
166+
"Confidence": 0.97852,
167167
"Warnings": null
168168
},
169169
"Sample": {
170170
"Values": [
171171
2352326,
172-
2352326,
173-
2358650,
174172
2358650,
175173
2364281,
176-
2364281,
177-
2365463,
178174
2365463,
179175
2367582,
180-
2367582,
181-
2367752,
182176
2367752,
183177
2368213,
184-
2368213,
185178
2369187,
186-
2369187,
187-
2370670,
188179
2370670,
189-
2375306,
190180
2375306
191181
],
192182
"Thresholds": {
@@ -199,32 +189,22 @@ json
199189
"Metric": "allocs/op",
200190
"Summary": {
201191
"Center": 10378.50000,
202-
"Lo": 10361,
203-
"Hi": 10392,
204-
"Confidence": 0.95861,
192+
"Lo": 10287,
193+
"Hi": 10398,
194+
"Confidence": 0.97852,
205195
"Warnings": null
206196
},
207197
"Sample": {
208198
"Values": [
209199
10246,
210-
10246,
211-
10287,
212200
10287,
213201
10361,
214-
10361,
215-
10377,
216202
10377,
217203
10378,
218-
10378,
219204
10379,
220-
10379,
221-
10386,
222205
10386,
223206
10392,
224-
10392,
225-
10398,
226207
10398,
227-
10411,
228208
10411
229209
],
230210
"Thresholds": {
@@ -236,10 +216,10 @@ json
236216
{
237217
"Metric": "sec/op",
238218
"Summary": {
239-
"Center": 0.00988,
219+
"Center": 0.00985,
240220
"Lo": 0.00985,
241-
"Hi": 0.00995,
242-
"Confidence": 0.95861,
221+
"Hi": 0.00985,
222+
"Confidence": 0.97852,
243223
"Warnings": null
244224
},
245225
"Sample": {
@@ -253,17 +233,7 @@ json
253233
0.00985,
254234
0.00985,
255235
0.00985,
256-
0.00985,
257-
0.00991,
258-
0.00993,
259-
0.00995,
260-
0.00995,
261-
0.00995,
262-
0.00995,
263-
0.00997,
264-
0.00998,
265-
0.00998,
266-
0.01000
236+
0.00985
267237
],
268238
"Thresholds": {
269239
"CompareAlpha": 0.05000
@@ -277,32 +247,22 @@ json
277247
"Metric": "B/op",
278248
"Summary": {
279249
"Center": 2367667,
280-
"Lo": 2364281,
281-
"Hi": 2369187,
282-
"Confidence": 0.95861,
250+
"Lo": 2358650,
251+
"Hi": 2370670,
252+
"Confidence": 0.97852,
283253
"Warnings": null
284254
},
285255
"Sample": {
286256
"Values": [
287257
2352326,
288-
2352326,
289-
2358650,
290258
2358650,
291259
2364281,
292-
2364281,
293260
2365463,
294-
2365463,
295-
2367582,
296261
2367582,
297262
2367752,
298-
2367752,
299263
2368213,
300-
2368213,
301-
2369187,
302264
2369187,
303265
2370670,
304-
2370670,
305-
2375306,
306266
2375306
307267
],
308268
"Thresholds": {
@@ -315,32 +275,22 @@ json
315275
"Metric": "allocs/op",
316276
"Summary": {
317277
"Center": 10378.50000,
318-
"Lo": 10361,
319-
"Hi": 10392,
320-
"Confidence": 0.95861,
278+
"Lo": 10287,
279+
"Hi": 10398,
280+
"Confidence": 0.97852,
321281
"Warnings": null
322282
},
323283
"Sample": {
324284
"Values": [
325-
10246,
326285
10246,
327286
10287,
328-
10287,
329-
10361,
330287
10361,
331288
10377,
332-
10377,
333289
10378,
334-
10378,
335-
10379,
336290
10379,
337291
10386,
338-
10386,
339-
10392,
340292
10392,
341293
10398,
342-
10398,
343-
10411,
344294
10411
345295
],
346296
"Thresholds": {
@@ -355,21 +305,11 @@ json
355305
"Center": 0,
356306
"Lo": 0,
357307
"Hi": 0,
358-
"Confidence": 0.95861,
308+
"Confidence": 0.97852,
359309
"Warnings": null
360310
},
361311
"Sample": {
362312
"Values": [
363-
0,
364-
0,
365-
0,
366-
0,
367-
0,
368-
0,
369-
0,
370-
0,
371-
0,
372-
0,
373313
0,
374314
0,
375315
0,
@@ -393,17 +333,11 @@ json
393333
"Center": 0.00985,
394334
"Lo": 0.00985,
395335
"Hi": 0.00985,
396-
"Confidence": 0.95861,
336+
"Confidence": 0.97852,
397337
"Warnings": null
398338
},
399339
"Sample": {
400340
"Values": [
401-
0.00981,
402-
0.00985,
403-
0.00985,
404-
0.00985,
405-
0.00985,
406-
0.00985,
407341
0.00985,
408342
0.00985,
409343
0.00985,
@@ -413,11 +347,7 @@ json
413347
0.00985,
414348
0.00985,
415349
0.00985,
416-
0.00987,
417-
0.00988,
418-
0.00990,
419-
0.00993,
420-
0.00998
350+
0.00985
421351
],
422352
"Thresholds": {
423353
"CompareAlpha": 0.05000

0 commit comments

Comments
 (0)