Skip to content

Commit c14a687

Browse files
committed
asim: improve comments and test set up for sma
This commit improves comments and test setup for tests under sma.
1 parent e5c5ac0 commit c14a687

File tree

8 files changed

+110
-51
lines changed

8 files changed

+110
-51
lines changed

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/add_node.txt

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,16 @@ set_span_config
2727
[0,10000): num_replicas=3 num_voters=3
2828
----
2929

30+
# Speed up up-replication by bumping the rebalancing snapshot rate.
31+
setting rebalancing_snapshot_rate_mib=512
32+
----
3033

3134
# Since there are 300 replicas on s1 and the default RF=3, we expect the other
3235
# stores to be up-replicated to 300 replicas as well.
33-
eval duration=20m samples=1 seed=42 metrics=(replicas)
36+
eval duration=12m samples=1 seed=42 metrics=(replicas)
3437
----
3538
replicas#1: first: [s1=301, s2=0, s3=0] (stddev=141.89, mean=100.33, sum=301)
36-
replicas#1: last: [s1=301, s2=271, s3=267] (stddev=15.17, mean=279.67, sum=839)
39+
replicas#1: last: [s1=300, s2=300, s3=301] (stddev=0.47, mean=300.33, sum=901)
3740
replicas#1: thrash_pct: [s1=0%, s2=0%, s3=0%] (sum=0%)
38-
artifacts[sma-count]: 58e379d72ddc641d
39-
failed assertion sample 1
40-
balance stat=replicas threshold=(≤1.05) ticks=6
41-
max/mean=1.08 tick=0
42-
max/mean=1.08 tick=1
43-
max/mean=1.08 tick=2
44-
max/mean=1.08 tick=3
45-
max/mean=1.08 tick=4
46-
max/mean=1.08 tick=5
41+
artifacts[sma-count]: c99b633edc560571
4742
==========================

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/conformance.txt

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ gen_cluster nodes=5 region=(a,b) nodes_per_region=(2,3)
55
----
66

77
# Generate 10 ranges, one replica will be placed on each node initially.
8-
gen_ranges ranges=5 repl_factor=4
8+
gen_ranges ranges=10 repl_factor=4
99
----
1010

1111
# Update the span config to require a non-voter in the `region=a` locality.
@@ -26,9 +26,32 @@ set_span_config delay=5m
2626
assertion type=conformance under=0 over=0 unavailable=0 violating=0
2727
----
2828

29-
eval duration=10m cfgs=(sma-count,mma-only)
29+
eval duration=1m cfgs=(sma-count,mma-only) metrics=(replicas) full=true
3030
----
31-
artifacts[sma-count]: 79c6ee6ca7f1ab55
31+
replicas#1: first: [s1=9, s2=9, s3=9, s4=9, s5=8] (stddev=0.40, mean=8.80, sum=44)
32+
replicas#1: last: [s1=11, s2=10, s3=9, s4=5, s5=9] (stddev=2.04, mean=8.80, sum=44)
33+
replicas#1: thrash_pct: [s1=239%, s2=170%, s3=67%, s4=0%, s5=0%] (sum=476%)
34+
artifacts[sma-count]: a6973dd2f74e933f
3235
==========================
33-
artifacts[mma-only]: 79c6ee6ca7f1ab55
36+
replicas#1: first: [s1=9, s2=9, s3=9, s4=9, s5=8] (stddev=0.40, mean=8.80, sum=44)
37+
replicas#1: last: [s1=11, s2=11, s3=9, s4=5, s5=8] (stddev=2.23, mean=8.80, sum=44)
38+
replicas#1: thrash_pct: [s1=105%, s2=37%, s3=67%, s4=0%, s5=0%] (sum=209%)
39+
artifacts[mma-only]: 76036bbb784a6b13
40+
==========================
41+
Cluster Set Up
42+
n1(a,a_1,8vcpu): {s1:(256GiB)}
43+
n2(a,a_1,8vcpu): {s2:(256GiB)}
44+
n3(b,b_1,8vcpu): {s3:(256GiB)}
45+
n4(b,b_1,8vcpu): {s4:(256GiB)}
46+
n5(b,b_1,8vcpu): {s5:(256GiB)}
47+
Key Space
48+
[0,10000): 10(rf=4), 0MiB, [s1:(8,2*),s2:(8,2*),s3:(8,2*),s4:(8,2*),s5:(8,2*)]
49+
Event
50+
[0,10000): 3voters,1nonvoters [replicas:{2:region=a}{2:region=b}] [voters:{1:region=a}{2:region=b}]
51+
set LBRebalancingMode to 2
52+
[0,10000): 3voters,1nonvoters [replicas:{2:region=a}{2:region=b}] [voters:{2:region=a}{1:region=b}] at 11:05:00
53+
Workload Set Up
54+
empty
55+
Changed Settings
56+
empty
3457
==========================

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/decommission_conformance.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ set_liveness node=4 liveness=decommissioning delay=5m
2626
assertion type=conformance under=0 over=0 unavailable=0 violating=0
2727
----
2828

29-
eval duration=20m cfgs=(sma-count,mma-only)
29+
eval duration=6m cfgs=(sma-count,mma-only)
3030
----
31-
artifacts[sma-count]: 8e9d6ada04cb9561
31+
artifacts[sma-count]: b29d203c32d8eff1
3232
==========================
33-
artifacts[mma-only]: 8d6fbe46b5c14575
33+
artifacts[mma-only]: 7cb8843ee1554cf5
3434
==========================
Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# This test verifies that the allocator correctly handles disk fullness by
2+
# shedding replicas from stores that exceed the storage capacity threshold. It
3+
# sets up a cluster where one store (s5) has significantly less capacity (100 GiB)
4+
# than others (512 GiB), causing it to hit the disk fullness threshold and
5+
# continuously shed replicas to maintain disk usage less than 95%.
16
skip_under_ci
27
----
38

@@ -20,29 +25,29 @@ set_capacity store=5 capacity=107374182400
2025
# We will repeatedly hit the disk fullness threshold which causes shedding
2126
# replicas on store 5. We should see s5 hovering right around 92.5-95%
2227
# (the storage capacity threshold value).
23-
eval duration=30m seed=42 metrics=(replicas,disk_fraction_used) cfgs=(sma-count,mma-only,mma-count)
28+
eval duration=20m seed=42 metrics=(replicas,disk_fraction_used) cfgs=(sma-count,mma-only,mma-count)
2429
----
2530
disk_fraction_used#1: first: [s1=0.20, s2=0.20, s3=0.20, s4=0.20, s5=1.05] (stddev=0.34, mean=0.37, sum=2)
26-
disk_fraction_used#1: last: [s1=0.30, s2=0.30, s3=0.30, s4=0.31, s5=0.95] (stddev=0.26, mean=0.43, sum=2)
27-
disk_fraction_used#1: thrash_pct: [s1=30%, s2=31%, s3=32%, s4=30%, s5=124%] (sum=246%)
31+
disk_fraction_used#1: last: [s1=0.27, s2=0.27, s3=0.27, s4=0.27, s5=0.92] (stddev=0.26, mean=0.40, sum=2)
32+
disk_fraction_used#1: thrash_pct: [s1=19%, s2=20%, s3=20%, s4=19%, s5=74%] (sum=153%)
2833
replicas#1: first: [s1=300, s2=300, s3=300, s4=300, s5=300] (stddev=0.00, mean=300.00, sum=1500)
29-
replicas#1: last: [s1=325, s2=324, s3=322, s4=329, s5=200] (stddev=50.05, mean=300.00, sum=1500)
30-
replicas#1: thrash_pct: [s1=227%, s2=237%, s3=240%, s4=228%, s5=91%] (sum=1023%)
31-
artifacts[sma-count]: e501ccc056a9e929
34+
replicas#1: last: [s1=324, s2=322, s3=323, s4=319, s5=212] (stddev=44.03, mean=300.00, sum=1500)
35+
replicas#1: thrash_pct: [s1=179%, s2=195%, s3=190%, s4=185%, s5=56%] (sum=805%)
36+
artifacts[sma-count]: 6a137567c06e77bf
3237
==========================
3338
disk_fraction_used#1: first: [s1=0.20, s2=0.20, s3=0.20, s4=0.20, s5=1.05] (stddev=0.34, mean=0.37, sum=2)
34-
disk_fraction_used#1: last: [s1=0.31, s2=0.31, s3=0.31, s4=0.29, s5=0.91] (stddev=0.24, mean=0.43, sum=2)
35-
disk_fraction_used#1: thrash_pct: [s1=2%, s2=1%, s3=1%, s4=0%, s5=67%] (sum=71%)
39+
disk_fraction_used#1: last: [s1=0.28, s2=0.28, s3=0.28, s4=0.26, s5=0.90] (stddev=0.25, mean=0.40, sum=2)
40+
disk_fraction_used#1: thrash_pct: [s1=1%, s2=0%, s3=1%, s4=0%, s5=47%] (sum=48%)
3641
replicas#1: first: [s1=300, s2=300, s3=300, s4=300, s5=300] (stddev=0.00, mean=300.00, sum=1500)
37-
replicas#1: last: [s1=331, s2=330, s3=331, s4=315, s5=193] (stddev=53.84, mean=300.00, sum=1500)
38-
replicas#1: thrash_pct: [s1=14%, s2=4%, s3=9%, s4=0%, s5=0%] (sum=26%)
39-
artifacts[mma-only]: 2f2f9597d62f93d7
42+
replicas#1: last: [s1=329, s2=330, s3=329, s4=305, s5=207] (stddev=47.45, mean=300.00, sum=1500)
43+
replicas#1: thrash_pct: [s1=4%, s2=0%, s3=6%, s4=0%, s5=0%] (sum=11%)
44+
artifacts[mma-only]: 1487ef171680ff9f
4045
==========================
4146
disk_fraction_used#1: first: [s1=0.20, s2=0.20, s3=0.20, s4=0.20, s5=1.05] (stddev=0.34, mean=0.37, sum=2)
42-
disk_fraction_used#1: last: [s1=0.30, s2=0.30, s3=0.31, s4=0.30, s5=0.90] (stddev=0.24, mean=0.42, sum=2)
43-
disk_fraction_used#1: thrash_pct: [s1=37%, s2=35%, s3=36%, s4=35%, s5=175%] (sum=318%)
47+
disk_fraction_used#1: last: [s1=0.27, s2=0.27, s3=0.27, s4=0.27, s5=0.92] (stddev=0.26, mean=0.40, sum=2)
48+
disk_fraction_used#1: thrash_pct: [s1=23%, s2=23%, s3=24%, s4=24%, s5=129%] (sum=223%)
4449
replicas#1: first: [s1=300, s2=300, s3=300, s4=300, s5=300] (stddev=0.00, mean=300.00, sum=1500)
45-
replicas#1: last: [s1=326, s2=328, s3=329, s4=326, s5=191] (stddev=54.51, mean=300.00, sum=1500)
46-
replicas#1: thrash_pct: [s1=277%, s2=263%, s3=272%, s4=260%, s5=176%] (sum=1247%)
47-
artifacts[mma-count]: 3118ca7a9543fdd3
50+
replicas#1: last: [s1=323, s2=323, s3=324, s4=317, s5=213] (stddev=43.57, mean=300.00, sum=1500)
51+
replicas#1: thrash_pct: [s1=220%, s2=220%, s3=236%, s4=231%, s5=171%] (sum=1077%)
52+
artifacts[mma-count]: 4d36e982fd0dff3e
4853
==========================

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/lease_preferences.txt

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
# This test demonstrates setting and asserting on lease preferences. The
2-
# cluster topology is identical to example_conformance.
1+
# This test verifies lease preference handling in a 5-node cluster (2 in region a, 3 in region b).
2+
# Tests three configurations:
3+
# 1. Satisfiable: voters in region b, lease preference for region b (no violations)
4+
# 2. Less-preferred: voters in region b, preference for region a then b (1 lease-less-preferred)
5+
# 3. Impossible: voters in region b, preference for region a (1 lease-violating)
6+
#
7+
# Expected: 1 lease-violating and 1 lease-less-preferred violation.
38
gen_cluster nodes=5 region=(a,b) nodes_per_region=(2,3)
49
----
510

@@ -28,9 +33,21 @@ set_span_config
2833
assertion type=conformance lease-violating=1 lease-less-preferred=1
2934
----
3035

31-
eval duration=10m cfgs=(sma-count,mma-only)
36+
eval duration=1m cfgs=(sma-count,mma-only) metrics=(replicas,leases)
3237
----
33-
artifacts[sma-count]: 35051da9f3ec651d
38+
leases#1: first: [s1=4, s2=0, s3=0, s4=0, s5=0] (stddev=1.60, mean=0.80, sum=4)
39+
leases#1: last: [s1=1, s2=0, s3=0, s4=2, s5=1] (stddev=0.75, mean=0.80, sum=4)
40+
leases#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%] (sum=0%)
41+
replicas#1: first: [s1=4, s2=4, s3=4, s4=0, s5=0] (stddev=1.96, mean=2.40, sum=12)
42+
replicas#1: last: [s1=1, s2=1, s3=4, s4=3, s5=3] (stddev=1.20, mean=2.40, sum=12)
43+
replicas#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%] (sum=0%)
44+
artifacts[sma-count]: eaa5c7254461b39d
3445
==========================
35-
artifacts[mma-only]: 339579f79e6b26fd
46+
leases#1: first: [s1=4, s2=0, s3=0, s4=0, s5=0] (stddev=1.60, mean=0.80, sum=4)
47+
leases#1: last: [s1=1, s2=0, s3=0, s4=1, s5=2] (stddev=0.75, mean=0.80, sum=4)
48+
leases#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%] (sum=0%)
49+
replicas#1: first: [s1=4, s2=4, s3=4, s4=0, s5=0] (stddev=1.96, mean=2.40, sum=12)
50+
replicas#1: last: [s1=1, s2=1, s3=4, s4=3, s5=3] (stddev=1.20, mean=2.40, sum=12)
51+
replicas#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%] (sum=0%)
52+
artifacts[mma-only]: 2216e9f740949e3d
3653
==========================

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/liveness.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# This test verifies node liveness handling in a 7-node cluster with 700 ranges (RF=3).
2+
# Node 7 is dead initially, node 6 becomes decommissioning after 3 minutes.
3+
# Expected: Dead/decommissioning nodes lose all replicas and leases.
14
skip_under_ci
25
----
36

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/rebalancing.txt

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# This test verifies that the allocator can rebalance qps load across a 7-node
2+
# cluster. mma-only fails here since it doesn't balance based on leases.
13
skip_under_ci
24
----
35

@@ -16,18 +18,11 @@ gen_ranges ranges=7 placement_type=skewed
1618
gen_load rate=7000 rw_ratio=0.95 access_skew=false min_block=128 max_block=256
1719
----
1820

19-
# Add two assertions, the first is a balance assertion. The balance assertion
20-
# requires that when simulation is evaluated that during last 6 ticks (60
21-
# seconds) the max/mean QPS of the cluster does not exceed 1.15.
21+
# Add two assertions (balanced, stable steady state) on QPS of the cluster.
2222
assertion stat=qps type=balance ticks=6 upper_bound=1.15
2323
----
2424
asserting: max_{stores}(qps)/mean_{stores}(qps) ≤ 1.15 at each of last 6 ticks
2525

26-
# The second is a steady state assertion. The steady state assertion requires
27-
# that during the last 6 ticks (60 seconds), the value of QPS per-store doesn't
28-
# increase or decrease by more than 5% of the mean. This type of assertion is
29-
# useful when a stat is balanced but not necessarily stable.
30-
#
3126
# TODO(tbg): at this point, six ticks is 3s, not 60s. Update assertion API
3227
# to take a duration, not ticks.
3328
assertion stat=qps type=steady ticks=6 upper_bound=0.05
@@ -39,7 +34,7 @@ asserting: |qps(t)/mean_{T}(qps) - 1| ≤ 0.05 ∀ t∈T and each store (T=last
3934
# Following the evaluation, the samples are checked individually against the
4035
# existing assertions, added above. If any assertion fails, the reason is
4136
# printed. If no assertions fail, then OK is printed.
42-
eval duration=3m samples=2 seed=42 metrics=(qps,replica_moves) cfgs=(sma-count,mma-only)
37+
eval duration=3m samples=2 seed=42 metrics=(qps,replica_moves) cfgs=(sma-count,mma-only) full=true
4338
----
4439
qps#1: last: [s1=996, s2=1001, s3=999, s4=994, s5=1001, s6=1002, s7=1007] (stddev=3.93, mean=1000.00, sum=7000)
4540
qps#1: thrash_pct: [s1=12%, s2=9%, s3=4%, s4=8%, s5=5%, s6=6%, s7=4%] (sum=48%)
@@ -73,10 +68,26 @@ failed assertion sample 2
7368
max/mean=4.00 tick=4
7469
max/mean=4.00 tick=5
7570
==========================
71+
Cluster Set Up
72+
n1(AU_EAST,AU_EAST_1,8vcpu): {s1:(256GiB)}
73+
n2(AU_EAST,AU_EAST_1,8vcpu): {s2:(256GiB)}
74+
n3(AU_EAST,AU_EAST_1,8vcpu): {s3:(256GiB)}
75+
n4(AU_EAST,AU_EAST_1,8vcpu): {s4:(256GiB)}
76+
n5(AU_EAST,AU_EAST_1,8vcpu): {s5:(256GiB)}
77+
n6(AU_EAST,AU_EAST_1,8vcpu): {s6:(256GiB)}
78+
n7(AU_EAST,AU_EAST_1,8vcpu): {s7:(256GiB)}
79+
Key Space
80+
[0,10000): 7(rf=3), 0MiB, [s1:(7,4*),s2:(6,2*),s3:(3,0*),s4:(2,0*),s5:(1,0*),s6:(1,1*),s7:(1,0*)]
81+
Event
82+
set LBRebalancingMode to 2
83+
Workload Set Up
84+
[0,10000): 95%r large-block [128-256B/op, 7000ops/s]
85+
Changed Settings
86+
empty
87+
==========================
7688

77-
# The cluster settings and simulation variables can be modified to examine how
78-
# allocation behaves during uncommon scenarios. Update the gossip delay to be
79-
# unreasonably long (default 500ms).
89+
# Update the gossip delay to be unreasonably long (default 500ms) to assess
90+
# uncommon scenarios.
8091
setting gossip_delay=20s
8192
----
8293

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/splitting.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# This test verifies load-based splitting behavior in a single-store cluster.
2+
# It tests both uniform and zipfian access patterns to ensure the split
3+
# algorithm works correctly under different load distributions. With a 10k QPS
4+
# load and 2.5k QPS split threshold, we expect approximately 4 splits for
5+
# uniform access and more splits for zipfian due to hotspotting.
16
skip_under_ci
27
----
38

0 commit comments

Comments
 (0)