|
| 1 | +// Copyright 2025 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | +// |
| 6 | + |
| 7 | +package model |
| 8 | + |
| 9 | +import ( |
| 10 | + "math" |
| 11 | + "math/rand" |
| 12 | + "sort" |
| 13 | +) |
| 14 | + |
| 15 | +const resampleCount = 1000 |
| 16 | +const confidence = 0.95 |
| 17 | + |
| 18 | +// calculateConfidenceInterval calculates the confidence interval for the ratio |
| 19 | +// of two sets of values. The confidence interval is calculated using a |
| 20 | +// bootstrap method. |
| 21 | +func calculateConfidenceInterval(newValues, oldValues []float64) ConfidenceInterval { |
| 22 | + rng := rand.New(rand.NewSource(hash(newValues) + hash(oldValues))) |
| 23 | + ratios := make([]float64, 0, resampleCount) |
| 24 | + resNew := make([]float64, len(newValues)) |
| 25 | + resOld := make([]float64, len(oldValues)) |
| 26 | + for range resampleCount { |
| 27 | + resample(rng, newValues, resNew) |
| 28 | + sort.Float64s(resNew) |
| 29 | + resample(rng, oldValues, resOld) |
| 30 | + sort.Float64s(resOld) |
| 31 | + |
| 32 | + medOld := median(resOld) |
| 33 | + // Skip if the old median is 0 to avoid division by zero. |
| 34 | + if medOld != 0 { |
| 35 | + ratios = append(ratios, median(resNew)/medOld) |
| 36 | + } |
| 37 | + } |
| 38 | + if len(ratios) == 0 { |
| 39 | + return ConfidenceInterval{} |
| 40 | + } |
| 41 | + sort.Float64s(ratios) |
| 42 | + alpha := (1.0 - confidence) / 2.0 |
| 43 | + lowerIndex := int(math.Floor(float64(len(ratios)) * alpha)) |
| 44 | + upperIndex := int(math.Floor(float64(len(ratios)) * (1.0 - alpha))) |
| 45 | + return ConfidenceInterval{ |
| 46 | + Low: ratios[lowerIndex], |
| 47 | + High: ratios[upperIndex], |
| 48 | + Center: median(ratios), |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +// resample samples a slice of values with replacement. |
| 53 | +func resample(r *rand.Rand, src, dest []float64) { |
| 54 | + length := len(src) |
| 55 | + for i := range dest { |
| 56 | + dest[i] = src[r.Intn(length)] |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +// hash returns an arbitrary hash of the given values. |
| 61 | +func hash(data []float64) int64 { |
| 62 | + var hashValue int64 |
| 63 | + for _, d := range data { |
| 64 | + hashValue += (int64)(math.Float64bits(d)) |
| 65 | + } |
| 66 | + return hashValue |
| 67 | +} |
| 68 | + |
| 69 | +// median returns the median of a sorted slice of values. |
| 70 | +func median(values []float64) float64 { |
| 71 | + length := len(values) |
| 72 | + if length%2 == 0 { |
| 73 | + return (values[length/2] + values[length/2-1]) / 2 |
| 74 | + } |
| 75 | + return values[length/2] |
| 76 | +} |
0 commit comments