@@ -48,6 +48,9 @@ const (
4848 meanCPUTolerance = 0.15
4949 // statSamplePeriod is the period at which timeseries stats are sampled.
5050 statSamplePeriod = 10 * time .Second
51+ // stableDuration is the duration which the cluster's load must remain
52+ // balanced for to pass.
53+ stableDuration = time .Minute
5154)
5255
5356func registerRebalanceLoad (r registry.Registry ) {
@@ -150,7 +153,8 @@ func registerRebalanceLoad(r registry.Registry) {
150153 }
151154
152155 var reason string
153- var done bool
156+ var balancedStartTime time.Time
157+ var prevIsBalanced bool
154158 for tBegin := timeutil .Now (); timeutil .Since (tBegin ) <= maxDuration ; {
155159 // Wait out the sample period initially to allow the timeseries to
156160 // populate meaningful information for the test to query.
@@ -160,14 +164,20 @@ func registerRebalanceLoad(r registry.Registry) {
160164 case <- time .After (statSamplePeriod ):
161165 }
162166
167+ now := timeutil .Now ()
163168 clusterStoresCPU , err := storeCPUFn (ctx )
164169 if err != nil {
165170 t .L ().Printf ("unable to get the cluster stores CPU %s\n " , err .Error ())
171+ continue
166172 }
167-
168- done , reason = isLoadEvenlyDistributed (clusterStoresCPU , meanCPUTolerance )
173+ var curIsBalanced bool
174+ curIsBalanced , reason = isLoadEvenlyDistributed (clusterStoresCPU , meanCPUTolerance )
169175 t .L ().Printf ("cpu %s" , reason )
170- if done {
176+ if ! prevIsBalanced && curIsBalanced {
177+ balancedStartTime = now
178+ }
179+ prevIsBalanced = curIsBalanced
180+ if prevIsBalanced && now .Sub (balancedStartTime ) > stableDuration {
171181 t .Status ("successfully achieved CPU balance; waiting for kv to finish running" )
172182 cancel ()
173183 return nil
@@ -194,7 +204,7 @@ func registerRebalanceLoad(r registry.Registry) {
194204 concurrency = 32
195205 fmt .Printf ("lowering concurrency to %d in local testing\n " , concurrency )
196206 }
197- rebalanceLoadRun (ctx , t , c , "leases" , 5 * time .Minute , concurrency , false /* mixedVersion */ )
207+ rebalanceLoadRun (ctx , t , c , "leases" , 10 * time .Minute , concurrency , false /* mixedVersion */ )
198208 },
199209 },
200210 )
@@ -208,7 +218,7 @@ func registerRebalanceLoad(r registry.Registry) {
208218 concurrency = 32
209219 fmt .Printf ("lowering concurrency to %d in local testing\n " , concurrency )
210220 }
211- rebalanceLoadRun (ctx , t , c , "leases" , 5 * time .Minute , concurrency , true /* mixedVersion */ )
221+ rebalanceLoadRun (ctx , t , c , "leases" , 10 * time .Minute , concurrency , true /* mixedVersion */ )
212222 },
213223 },
214224 )
@@ -224,7 +234,7 @@ func registerRebalanceLoad(r registry.Registry) {
224234 fmt .Printf ("lowering concurrency to %d in local testing\n " , concurrency )
225235 }
226236 rebalanceLoadRun (
227- ctx , t , c , "leases and replicas" , 5 * time .Minute , concurrency , false , /* mixedVersion */
237+ ctx , t , c , "leases and replicas" , 10 * time .Minute , concurrency , false , /* mixedVersion */
228238 )
229239 },
230240 },
@@ -240,7 +250,7 @@ func registerRebalanceLoad(r registry.Registry) {
240250 fmt .Printf ("lowering concurrency to %d in local testing\n " , concurrency )
241251 }
242252 rebalanceLoadRun (
243- ctx , t , c , "leases and replicas" , 5 * time .Minute , concurrency , true , /* mixedVersion */
253+ ctx , t , c , "leases and replicas" , 10 * time .Minute , concurrency , true , /* mixedVersion */
244254 )
245255 },
246256 },
0 commit comments