Skip to content

Commit 2c7aafc

Browse files
committed
roachtest: adjust failover tests to changed liveness rangeID
As of #155554, the liveness range has rangeID three, not two. The tests are updated to avoid relying on the particular ID. Instead, we check on the pretty-printed start key of the liveness range, /System/NodeLiveness. Closes #156450. Closes #156449. Closes #156448. Epic: none
1 parent 21b75ac commit 2c7aafc

File tree

1 file changed

+20
-13
lines changed

1 file changed

+20
-13
lines changed

pkg/cmd/roachtest/tests/failover.go

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ var failoverAggregateFunction = func(test string, histogram *roachtestutil.Histo
5656
}, nil
5757
}
5858

59+
// NB: the liveness range's ID has changed in the past. It used to be 2, now
60+
// it's 3 (#155554). Using the start key avoids relying on a specific rangeID.
61+
const (
62+
predAllButLiveness = `start_key != '/System/NodeLiveness'`
63+
predOnlyLiveness = `start_key = '/System/NodeLiveness'`
64+
)
65+
5966
// registerFailover registers a set of failover benchmarks. These tests
6067
// benchmark the maximum unavailability experienced by clients during various
6168
// node failures, and exports them for roachperf graphing. They do not make any
@@ -752,7 +759,7 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster
752759
// all nodes regardless.
753760
relocateRanges(t, ctx, conn, `database_name = 'kv'`, []int{1, 2, 3, 4}, []int{5, 6, 7})
754761
relocateRanges(t, ctx, conn, `database_name != 'kv'`, []int{5, 6, 7}, []int{1, 2, 3, 4})
755-
relocateRanges(t, ctx, conn, `range_id != 2`, []int{4}, []int{1, 2, 3})
762+
relocateRanges(t, ctx, conn, predAllButLiveness, []int{4}, []int{1, 2, 3})
756763

757764
// Run workload on n8 using n1-n3 as gateways (not partitioned) until test
758765
// ends (context cancels).
@@ -781,8 +788,8 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster
781788
// them to where they should be.
782789
relocateRanges(t, ctx, conn, `database_name = 'kv'`, []int{1, 2, 3, 4}, []int{5, 6, 7})
783790
relocateRanges(t, ctx, conn, `database_name != 'kv'`, []int{node}, []int{1, 2, 3})
784-
relocateRanges(t, ctx, conn, `range_id = 2`, []int{5, 6, 7}, []int{1, 2, 3, 4})
785-
relocateLeases(t, ctx, conn, `range_id = 2`, 4)
791+
relocateRanges(t, ctx, conn, predOnlyLiveness, []int{5, 6, 7}, []int{1, 2, 3, 4})
792+
relocateLeases(t, ctx, conn, predOnlyLiveness, 4)
786793

787794
// Randomly sleep up to the lease renewal interval, to vary the time
788795
// between the last lease renewal and the failure.
@@ -976,10 +983,10 @@ func runFailoverLiveness(
976983
// do it ourselves. Precreating the database/range and moving it to the
977984
// correct nodes first is not sufficient, since workload will spread the
978985
// ranges across all nodes regardless.
979-
relocateRanges(t, ctx, conn, `range_id != 2`, []int{4}, []int{1, 2, 3})
986+
relocateRanges(t, ctx, conn, predAllButLiveness, []int{4}, []int{1, 2, 3})
980987

981-
// We also make sure the lease is located on n4.
982-
relocateLeases(t, ctx, conn, `range_id = 2`, 4)
988+
// We also make sure the liveness lease is located on n4.
989+
relocateLeases(t, ctx, conn, predOnlyLiveness, 4)
983990

984991
// Run workload on n5 via n1-n3 gateways until test ends (context cancels).
985992
t.L().Printf("running workload")
@@ -1002,8 +1009,8 @@ func runFailoverLiveness(
10021009

10031010
// Ranges and leases may occasionally escape their constraints. Move them
10041011
// to where they should be.
1005-
relocateRanges(t, ctx, conn, `range_id != 2`, []int{4}, []int{1, 2, 3})
1006-
relocateLeases(t, ctx, conn, `range_id = 2`, 4)
1012+
relocateRanges(t, ctx, conn, predAllButLiveness, []int{4}, []int{1, 2, 3})
1013+
relocateLeases(t, ctx, conn, predOnlyLiveness, 4)
10071014

10081015
// Randomly sleep up to the lease renewal interval, to vary the time
10091016
// between the last lease renewal and the failure.
@@ -1018,7 +1025,7 @@ func runFailoverLiveness(
10181025

10191026
t.L().Printf("recovering n%d (%s)", 4, failer)
10201027
failer.Recover(ctx, 4)
1021-
relocateLeases(t, ctx, conn, `range_id = 2`, 4)
1028+
relocateLeases(t, ctx, conn, predOnlyLiveness, 4)
10221029
}
10231030

10241031
sleepFor(ctx, t, time.Minute) // let cluster recover
@@ -1089,9 +1096,9 @@ func runFailoverSystemNonLiveness(
10891096
// n1-n3, so we do it ourselves. Precreating the database/range and moving it
10901097
// to the correct nodes first is not sufficient, since workload will spread
10911098
// the ranges across all nodes regardless.
1092-
relocateRanges(t, ctx, conn, `database_name = 'kv' OR range_id = 2`,
1099+
relocateRanges(t, ctx, conn, `database_name = 'kv' OR `+predOnlyLiveness,
10931100
[]int{4, 5, 6}, []int{1, 2, 3})
1094-
relocateRanges(t, ctx, conn, `database_name != 'kv' AND range_id != 2`,
1101+
relocateRanges(t, ctx, conn, `database_name != 'kv' AND `+predAllButLiveness,
10951102
[]int{1, 2, 3}, []int{4, 5, 6})
10961103

10971104
// Run workload on n7 via n1-n3 as gateways until test ends (context cancels).
@@ -1116,9 +1123,9 @@ func runFailoverSystemNonLiveness(
11161123

11171124
// Ranges may occasionally escape their constraints. Move them
11181125
// to where they should be.
1119-
relocateRanges(t, ctx, conn, `database_name != 'kv' AND range_id != 2`,
1126+
relocateRanges(t, ctx, conn, `database_name != 'kv' AND `+predAllButLiveness,
11201127
[]int{1, 2, 3}, []int{4, 5, 6})
1121-
relocateRanges(t, ctx, conn, `database_name = 'kv' OR range_id = 2`,
1128+
relocateRanges(t, ctx, conn, `database_name = 'kv' OR `+predOnlyLiveness,
11221129
[]int{4, 5, 6}, []int{1, 2, 3})
11231130

11241131
// Randomly sleep up to the lease renewal interval, to vary the time

0 commit comments

Comments
 (0)