|
| 1 | +# This test reproduces the issue observed in #152604, where |
| 2 | +# a zone config change prefers removing live replicas despite |
| 3 | +# the presence of recently down replicas. This leads to loss |
| 4 | +# of quorum. |
| 5 | +# |
| 6 | +# See: |
| 7 | +# - https://github.com/cockroachdb/cockroach/issues/152604 |
| 8 | +# - https://github.com/cockroachdb/cockroach/issues/155734 |
| 9 | + |
| 10 | +gen_cluster nodes=5 |
| 11 | +---- |
| 12 | + |
| 13 | +# Place ranges, replicated across all five nodes. |
| 14 | +gen_ranges ranges=100 repl_factor=5 min_key=1 max_key=10000 |
| 15 | +---- |
| 16 | + |
| 17 | +# Mark n4 and n5 as NodeLivenessStatus_UNAVAILABLE, which is the status |
| 18 | +# stores have when down but not down for long enough to be marked as dead. |
| 19 | +# The range doesn't lose quorum as a result of this, since three replicas |
| 20 | +# are still around. |
| 21 | +set_liveness node=4 liveness=unavailable |
| 22 | +---- |
| 23 | + |
| 24 | +set_liveness node=5 liveness=unavailable |
| 25 | +---- |
| 26 | + |
| 27 | +# Trigger down-replication to three replicas. |
| 28 | + |
| 29 | +set_span_config |
| 30 | +[0,10000): num_replicas=3 num_voters=3 |
| 31 | +---- |
| 32 | + |
| 33 | +# Note how s4 and s5 retain their replicas, while replicas are being |
| 34 | +# remved from live nodes s1-s3. This leads to a loss of quorum that |
| 35 | +# isn't immediately obvious since this is an asim test, but the logs |
| 36 | +# show that the allocator itself realizes (when trying to make the |
| 37 | +# next change), but that is too late. |
| 38 | +# |
| 39 | +# In the real world, as of #156464, these dangerous replication changes |
| 40 | +# would be blocked, but it is far from ideal that they are attempted |
| 41 | +# in the first place. |
| 42 | +eval duration=10m cfgs=(sma-count) metrics=(replicas) |
| 43 | +---- |
| 44 | +replicas#1: first: [s1=101, s2=101, s3=101, s4=101, s5=101] (stddev=0.00, mean=101.00, sum=505) |
| 45 | +replicas#1: last: [s1=68, s2=67, s3=68, s4=101, s5=101] (stddev=16.33, mean=81.00, sum=405) |
| 46 | +replicas#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%] (sum=0%) |
| 47 | +artifacts[sma-count]: ff4c6613afd4b749 |
| 48 | +========================== |
0 commit comments