Skip to content

Commit cb85168

Browse files
committed
asim: improve skewedDistribution and its testing
Previously, we fixed the skewedDistribution function by generating weights that decrease by a factor of 1/2 for each subsequent store, and then normalizing them so that they sum up to 1. The result represented a skewed replica weight distribution across stores. However, this required two passes: one to generate and sum the weights, and another to normalize them. This commit improves the logic by using the finite sum of a geometric series to pre-compute the total, allowing normalization in a single pass. It also improves test coverage by adding an echotest that asserts the expected output of the helper functions.
1 parent 9e98ae8 commit cb85168

File tree

6 files changed

+118
-72
lines changed

6 files changed

+118
-72
lines changed

pkg/kv/kvserver/asim/state/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ go_test(
7272
"//pkg/kv/kvserver/load",
7373
"//pkg/roachpb",
7474
"//pkg/testutils/datapathutils",
75+
"//pkg/testutils/echotest",
7576
"//pkg/util/hlc",
77+
"//pkg/util/leaktest",
7678
"@com_github_cockroachdb_datadriven//:datadriven",
7779
"@com_github_stretchr_testify//require",
7880
],

pkg/kv/kvserver/asim/state/new_state.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,21 +39,21 @@ func evenDistribution(numOfStores int) []float64 {
3939
return distribution
4040
}
4141

42-
func skewedDistribution(numOfStores, k int) []float64 {
42+
func skewedDistribution(numOfStores int) []float64 {
4343
weights := make([]float64, numOfStores)
44-
var total float64
45-
// Compute weights.
44+
// Sum of weights. Since weights computed won't add up to 1, we normalize it
45+
// by dividing the sum of weights. Sum is pre-computed here using the partial
46+
// sum formula of a geometric series: sum of 2^(-i) from i = 0 to k gives
47+
// 2-2^(-k).
48+
// Example: given 3 stores, cur(weights before normalization) is 1, 0.5, 0.25,
49+
// sum is 2.0-2^(-2) = 1.75. After normalization, weights are 0.57, 0.29,
50+
// 0.14.
51+
sum := 2.0 - math.Pow(2, float64(-(numOfStores-1)))
52+
cur := float64(1)
4653
for i := 0; i < numOfStores; i++ {
47-
// weight[0] = 2^(n-1)
48-
// weight[1] = 2^(n-2)
49-
// ...
50-
// weight[n-1] = 2^0
51-
weights[i] = math.Pow(2, float64(numOfStores-i-1))
52-
total += weights[i]
53-
}
54-
// Normalize to get ratios.
55-
for i := 0; i < numOfStores; i++ {
56-
weights[i] /= total
54+
// cur is 1, 0.5, 0.25, ...
55+
weights[i] = cur / sum
56+
cur /= 2
5757
}
5858
return weights
5959
}
@@ -285,7 +285,7 @@ func makeStoreList(stores int) []StoreID {
285285
func RangesInfoSkewedDistribution(
286286
stores int, ranges int, minKey int64, maxKey int64, replicationFactor int, rangeSize int64,
287287
) RangesInfo {
288-
distribution := skewedDistribution(stores, ranges)
288+
distribution := skewedDistribution(stores)
289289
storeList := makeStoreList(stores)
290290

291291
return RangesInfoWithDistribution(

pkg/kv/kvserver/asim/state/state_test.go

Lines changed: 75 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
package state
77

88
import (
9+
"fmt"
910
"math/rand"
11+
"strings"
1012
"testing"
1113
"time"
1214

@@ -15,6 +17,9 @@ import (
1517
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
1618
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/load"
1719
"github.com/cockroachdb/cockroach/pkg/roachpb"
20+
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
21+
"github.com/cockroachdb/cockroach/pkg/testutils/echotest"
22+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
1823
"github.com/stretchr/testify/require"
1924
)
2025

@@ -414,53 +419,6 @@ func TestOrderedStateLists(t *testing.T) {
414419
s = NewStateWeightedRandDistribution(defaultSeed, []float64{0.0, 0.1, 0.3, 0.6}, 1400, 10000, 3, settings)
415420
assertListsOrdered(s)
416421
}
417-
func TestSkewedDistribution(t *testing.T) {
418-
rangeInfo := RangesInfoSkewedDistribution(
419-
6 /*stores*/, 100 /*ranges*/, 1 /*minKey*/, 10000 /*maxKey*/, 3 /*replicationFactor*/, 10000 /*rangeSize*/)
420-
expectedStoreReplicas := map[roachpb.StoreID]int{
421-
1: 100,
422-
2: 87,
423-
3: 49,
424-
4: 30,
425-
5: 20,
426-
6: 14,
427-
}
428-
429-
totalReplicas := 0
430-
stores := map[roachpb.StoreID]int{}
431-
for _, rng := range rangeInfo {
432-
for _, repl := range rng.Descriptor.InternalReplicas {
433-
stores[repl.StoreID]++
434-
totalReplicas++
435-
}
436-
}
437-
require.Equal(t, 300, totalReplicas)
438-
require.Equal(t, expectedStoreReplicas, stores)
439-
require.Equal(t, 6, len(stores))
440-
}
441-
func TestEvenDistribution(t *testing.T) {
442-
rangeInfo := RangesInfoEvenDistribution(
443-
6 /*stores*/, 100 /*ranges*/, 1 /*minKey*/, 10000 /*maxKey*/, 3 /*replicationFactor*/, 10000 /*rangeSize*/)
444-
expectedStoreReplicas := map[roachpb.StoreID]int{
445-
1: 50,
446-
2: 50,
447-
3: 50,
448-
4: 50,
449-
5: 50,
450-
6: 50,
451-
}
452-
totalReplicas := 0
453-
stores := map[roachpb.StoreID]int{}
454-
for _, rng := range rangeInfo {
455-
for _, repl := range rng.Descriptor.InternalReplicas {
456-
stores[repl.StoreID]++
457-
totalReplicas++
458-
}
459-
}
460-
require.Equal(t, 300, totalReplicas)
461-
require.Equal(t, expectedStoreReplicas, stores)
462-
require.Equal(t, 6, len(stores))
463-
}
464422

465423
// TestNewStateDeterministic asserts that the state returned from the new state
466424
// utility functions is deterministic.
@@ -834,3 +792,73 @@ func TestCapacityOverride(t *testing.T) {
834792
// reason.
835793
require.Equal(t, 500.0, capacity.WritesPerSecond)
836794
}
795+
796+
// TestDistribution tests the distribution helper functions. The invariants
797+
// are that the distributions sum to 1.0 and that the distribution is
798+
// expected.
799+
func TestDistribution(t *testing.T) {
800+
defer leaktest.AfterTest(t)()
801+
802+
sum := func(values []float64) float64 {
803+
total := 0.0
804+
for _, v := range values {
805+
total += v
806+
}
807+
return total
808+
}
809+
810+
const seed = 42
811+
randSource := rand.New(rand.NewSource(seed))
812+
813+
testCases := []struct {
814+
numStores int
815+
fns []struct {
816+
name string
817+
fn func() []float64
818+
}
819+
}{
820+
{
821+
numStores: 3,
822+
fns: []struct {
823+
name string
824+
fn func() []float64
825+
}{
826+
{name: "even", fn: func() []float64 { return evenDistribution(3) }},
827+
{name: "skewed", fn: func() []float64 { return skewedDistribution(3) }},
828+
{name: "exact", fn: func() []float64 { return exactDistribution([]int{1, 1, 1}) }},
829+
{name: "weighted_rand", fn: func() []float64 {
830+
return weightedRandDistribution(randSource, []float64{0.6, 0.2, 0.2})
831+
}},
832+
{name: "rand", fn: func() []float64 { return randDistribution(randSource, 3) }},
833+
},
834+
},
835+
{
836+
numStores: 10,
837+
fns: []struct {
838+
name string
839+
fn func() []float64
840+
}{
841+
{name: "even", fn: func() []float64 { return evenDistribution(10) }},
842+
{name: "skewed", fn: func() []float64 { return skewedDistribution(10) }},
843+
{name: "exact", fn: func() []float64 { return exactDistribution([]int{2, 2, 2, 2, 2, 1, 1, 1, 1, 1}) }},
844+
{name: "weighted_rand", fn: func() []float64 {
845+
return weightedRandDistribution(randSource, []float64{0.5, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05})
846+
}},
847+
{name: "rand", fn: func() []float64 { return randDistribution(randSource, 10) }},
848+
},
849+
},
850+
}
851+
w := echotest.NewWalker(t, datapathutils.TestDataPath(t, "echotest"))
852+
for _, testCase := range testCases {
853+
t.Run(fmt.Sprintf("%d_stores", testCase.numStores), func(t *testing.T) {
854+
t.Run("distribution", w.Run(t, fmt.Sprintf("%d_stores", testCase.numStores), func(t *testing.T) string {
855+
var str strings.Builder
856+
for _, fn := range testCase.fns {
857+
dist := fn.fn()
858+
str.WriteString(fmt.Sprintf("[%s: %.2f, sum: %.2f]\n", fn.name, dist, sum(dist)))
859+
}
860+
return str.String()
861+
}))
862+
})
863+
}
864+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# This test tests the distribution helper functions. The invariants are that the
2+
# distributions sum to 1.0 and that the distribution is expected. The input is
3+
# even, skewed, rand, exact: store replica count (2,2,2,2,2,1,1,1,1,1), and
4+
# weighted_rand: store replica ratio
5+
# (0.5,0.1,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05). The output is the
6+
# distribution for each helper function which represent the replica weight
7+
# distribution across 10 stores.
8+
echo
9+
----
10+
[even: [0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10], sum: 1.00]
11+
[skewed: [0.50 0.25 0.13 0.06 0.03 0.02 0.01 0.00 0.00 0.00], sum: 1.00]
12+
[exact: [0.13 0.13 0.13 0.13 0.13 0.07 0.07 0.07 0.07 0.07], sum: 1.00]
13+
[weighted_rand: [0.50 0.00 0.00 0.10 0.20 0.00 0.00 0.00 0.10 0.10], sum: 1.00]
14+
[rand: [0.11 0.09 0.02 0.04 0.20 0.15 0.20 0.04 0.04 0.11], sum: 1.00]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# This test tests the distribution helper functions. The invariants are that the
2+
# distributions sum to 1.0 and that the distribution is expected. The input is
3+
# even, skewed, rand, exact: store replica count (1,1,1), and weighted_rand:
4+
# store replica ratio (0.6,0.2,0.2). The output is the distribution for each
5+
# helper function which represent the replica weight distribution across 3
6+
# stores.
7+
echo
8+
----
9+
[even: [0.33 0.33 0.33], sum: 1.00]
10+
[skewed: [0.57 0.29 0.14], sum: 1.00]
11+
[exact: [0.33 0.33 0.33], sum: 1.00]
12+
[weighted_rand: [0.70 0.20 0.10], sum: 1.00]
13+
[rand: [0.43 0.33 0.24], sum: 1.00]

pkg/testutils/echotest/echotest.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,6 @@ type Walker struct {
6565
// all files (i.e. the expected outputs, one per test case) are kept.
6666
//
6767
// Model usage:
68-
//
69-
// w := NewWalker(t, datapathutils.TestDataPath(t))
70-
// for _, test := range []struct{ name string }{
71-
// {name: "foo"},
72-
// {name: "bar"},
73-
// } {
74-
// t.Run(test.name, w.Run(t, test.name, func(t *testing.T, path string) {
75-
// Require(t, fmt.Sprintf("hello, %s", test.name), path)
76-
// }))
77-
// }
78-
//
7968
// w := NewWalker(t, datapathutils.TestDataPath(t))
8069
//
8170
// for _, test := range []struct{ name string }{

0 commit comments

Comments
 (0)