@@ -56,6 +56,13 @@ var failoverAggregateFunction = func(test string, histogram *roachtestutil.Histo
5656 }, nil
5757}
5858
59+ // NB: the liveness range's ID has changed in the past. It used to be 2, now
60+ // it's 3 (#155554). Using the start key avoids relying on a specific rangeID.
61+ const (
62+ predAllButLiveness = `start_key != '/System/NodeLiveness'`
63+ predOnlyLiveness = `start_key = '/System/NodeLiveness'`
64+ )
65+
5966// registerFailover registers a set of failover benchmarks. These tests
6067// benchmark the maximum unavailability experienced by clients during various
6168// node failures, and exports them for roachperf graphing. They do not make any
@@ -752,7 +759,7 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster
752759 // all nodes regardless.
753760 relocateRanges (t , ctx , conn , `database_name = 'kv'` , []int {1 , 2 , 3 , 4 }, []int {5 , 6 , 7 })
754761 relocateRanges (t , ctx , conn , `database_name != 'kv'` , []int {5 , 6 , 7 }, []int {1 , 2 , 3 , 4 })
755- relocateRanges (t , ctx , conn , `range_id != 2` , []int {4 }, []int {1 , 2 , 3 })
762+ relocateRanges (t , ctx , conn , predAllButLiveness , []int {4 }, []int {1 , 2 , 3 })
756763
757764 // Run workload on n8 using n1-n3 as gateways (not partitioned) until test
758765 // ends (context cancels).
@@ -781,8 +788,8 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster
781788 // them to where they should be.
782789 relocateRanges (t , ctx , conn , `database_name = 'kv'` , []int {1 , 2 , 3 , 4 }, []int {5 , 6 , 7 })
783790 relocateRanges (t , ctx , conn , `database_name != 'kv'` , []int {node }, []int {1 , 2 , 3 })
784- relocateRanges (t , ctx , conn , `range_id = 2` , []int {5 , 6 , 7 }, []int {1 , 2 , 3 , 4 })
785- relocateLeases (t , ctx , conn , `range_id = 2` , 4 )
791+ relocateRanges (t , ctx , conn , predOnlyLiveness , []int {5 , 6 , 7 }, []int {1 , 2 , 3 , 4 })
792+ relocateLeases (t , ctx , conn , predOnlyLiveness , 4 )
786793
787794 // Randomly sleep up to the lease renewal interval, to vary the time
788795 // between the last lease renewal and the failure.
@@ -976,10 +983,10 @@ func runFailoverLiveness(
976983 // do it ourselves. Precreating the database/range and moving it to the
977984 // correct nodes first is not sufficient, since workload will spread the
978985 // ranges across all nodes regardless.
979- relocateRanges (t , ctx , conn , `range_id != 2` , []int {4 }, []int {1 , 2 , 3 })
986+ relocateRanges (t , ctx , conn , predAllButLiveness , []int {4 }, []int {1 , 2 , 3 })
980987
981- // We also make sure the lease is located on n4.
982- relocateLeases (t , ctx , conn , `range_id = 2` , 4 )
988+ // We also make sure the liveness lease is located on n4.
989+ relocateLeases (t , ctx , conn , predOnlyLiveness , 4 )
983990
984991 // Run workload on n5 via n1-n3 gateways until test ends (context cancels).
985992 t .L ().Printf ("running workload" )
@@ -1002,8 +1009,8 @@ func runFailoverLiveness(
10021009
10031010 // Ranges and leases may occasionally escape their constraints. Move them
10041011 // to where they should be.
1005- relocateRanges (t , ctx , conn , `range_id != 2` , []int {4 }, []int {1 , 2 , 3 })
1006- relocateLeases (t , ctx , conn , `range_id = 2` , 4 )
1012+ relocateRanges (t , ctx , conn , predAllButLiveness , []int {4 }, []int {1 , 2 , 3 })
1013+ relocateLeases (t , ctx , conn , predOnlyLiveness , 4 )
10071014
10081015 // Randomly sleep up to the lease renewal interval, to vary the time
10091016 // between the last lease renewal and the failure.
@@ -1018,7 +1025,7 @@ func runFailoverLiveness(
10181025
10191026 t .L ().Printf ("recovering n%d (%s)" , 4 , failer )
10201027 failer .Recover (ctx , 4 )
1021- relocateLeases (t , ctx , conn , `range_id = 2` , 4 )
1028+ relocateLeases (t , ctx , conn , predOnlyLiveness , 4 )
10221029 }
10231030
10241031 sleepFor (ctx , t , time .Minute ) // let cluster recover
@@ -1089,9 +1096,9 @@ func runFailoverSystemNonLiveness(
10891096 // n1-n3, so we do it ourselves. Precreating the database/range and moving it
10901097 // to the correct nodes first is not sufficient, since workload will spread
10911098 // the ranges across all nodes regardless.
1092- relocateRanges (t , ctx , conn , `database_name = 'kv' OR range_id = 2` ,
1099+ relocateRanges (t , ctx , conn , `database_name = 'kv' OR ` + predOnlyLiveness ,
10931100 []int {4 , 5 , 6 }, []int {1 , 2 , 3 })
1094- relocateRanges (t , ctx , conn , `database_name != 'kv' AND range_id != 2` ,
1101+ relocateRanges (t , ctx , conn , `database_name != 'kv' AND ` + predAllButLiveness ,
10951102 []int {1 , 2 , 3 }, []int {4 , 5 , 6 })
10961103
10971104 // Run workload on n7 via n1-n3 as gateways until test ends (context cancels).
@@ -1116,9 +1123,9 @@ func runFailoverSystemNonLiveness(
11161123
11171124 // Ranges may occasionally escape their constraints. Move them
11181125 // to where they should be.
1119- relocateRanges (t , ctx , conn , `database_name != 'kv' AND range_id != 2` ,
1126+ relocateRanges (t , ctx , conn , `database_name != 'kv' AND ` + predAllButLiveness ,
11201127 []int {1 , 2 , 3 }, []int {4 , 5 , 6 })
1121- relocateRanges (t , ctx , conn , `database_name = 'kv' OR range_id = 2` ,
1128+ relocateRanges (t , ctx , conn , `database_name = 'kv' OR ` + predOnlyLiveness ,
11221129 []int {4 , 5 , 6 }, []int {1 , 2 , 3 })
11231130
11241131 // Randomly sleep up to the lease renewal interval, to vary the time
0 commit comments