77 "time"
88
99 "github.com/pkg/errors"
10+ "github.com/prometheus/client_golang/prometheus"
11+ "github.com/prometheus/prometheus/pkg/labels"
1012 "github.com/prometheus/prometheus/pkg/timestamp"
1113 "github.com/stretchr/testify/assert"
1214 "github.com/stretchr/testify/require"
@@ -17,6 +19,7 @@ import (
1719 "github.com/cortexproject/cortex/pkg/ring"
1820 "github.com/cortexproject/cortex/pkg/ring/kv"
1921 "github.com/cortexproject/cortex/pkg/ring/kv/consul"
22+ "github.com/cortexproject/cortex/pkg/util"
2023 "github.com/cortexproject/cortex/pkg/util/flagext"
2124 "github.com/cortexproject/cortex/pkg/util/services"
2225 "github.com/cortexproject/cortex/pkg/util/test"
@@ -196,13 +199,14 @@ func TestCheckReplicaMultiCluster(t *testing.T) {
196199 replica1 := "replica1"
197200 replica2 := "replica2"
198201
202+ reg := prometheus .NewPedanticRegistry ()
199203 c , err := newClusterTracker (HATrackerConfig {
200204 EnableHATracker : true ,
201205 KVStore : kv.Config {Store : "inmemory" },
202206 UpdateTimeout : 100 * time .Millisecond ,
203207 UpdateTimeoutJitterMax : 0 ,
204208 FailoverTimeout : time .Second ,
205- }, trackerLimits {maxClusters : 100 }, nil )
209+ }, trackerLimits {maxClusters : 100 }, reg )
206210 require .NoError (t , err )
207211 require .NoError (t , services .StartAndAwaitRunning (context .Background (), c ))
208212 defer services .StopAndAwaitTerminated (context .Background (), c ) //nolint:errcheck
@@ -224,20 +228,34 @@ func TestCheckReplicaMultiCluster(t *testing.T) {
224228 assert .NoError (t , err )
225229 err = c .checkReplica (context .Background (), "user" , "c2" , replica1 )
226230 assert .NoError (t , err )
231+
232+ // We expect no CAS operation failures.
233+ metrics , err := reg .Gather ()
234+ require .NoError (t , err )
235+
236+ assert .Equal (t , uint64 (0 ), util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
237+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
238+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "5.*" ),
239+ }))
240+ assert .Greater (t , util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
241+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
242+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "2.*" ),
243+ }), uint64 (0 ))
227244}
228245
229246func TestCheckReplicaMultiClusterTimeout (t * testing.T ) {
230247 start := mtime .Now ()
231248 replica1 := "replica1"
232249 replica2 := "replica2"
233250
251+ reg := prometheus .NewPedanticRegistry ()
234252 c , err := newClusterTracker (HATrackerConfig {
235253 EnableHATracker : true ,
236254 KVStore : kv.Config {Store : "inmemory" },
237255 UpdateTimeout : 100 * time .Millisecond ,
238256 UpdateTimeoutJitterMax : 0 ,
239257 FailoverTimeout : time .Second ,
240- }, trackerLimits {maxClusters : 100 }, nil )
258+ }, trackerLimits {maxClusters : 100 }, reg )
241259 require .NoError (t , err )
242260 require .NoError (t , services .StartAndAwaitRunning (context .Background (), c ))
243261 defer services .StopAndAwaitTerminated (context .Background (), c ) //nolint:errcheck
@@ -259,7 +277,13 @@ func TestCheckReplicaMultiClusterTimeout(t *testing.T) {
259277 err = c .checkReplica (context .Background (), "user" , "c2" , replica1 )
260278 assert .NoError (t , err )
261279
262- // Wait more than the timeout.
280+ // Reject samples from replica 2 in each cluster.
281+ err = c .checkReplica (context .Background (), "user" , "c1" , replica2 )
282+ assert .Error (t , err )
283+ err = c .checkReplica (context .Background (), "user" , "c2" , replica2 )
284+ assert .Error (t , err )
285+
286+ // Wait more than the failover timeout.
263287 mtime .NowForce (start .Add (1100 * time .Millisecond ))
264288
265289 // Accept a sample from c1/replica2.
@@ -271,6 +295,19 @@ func TestCheckReplicaMultiClusterTimeout(t *testing.T) {
271295 assert .Error (t , err )
272296 err = c .checkReplica (context .Background (), "user" , "c2" , replica1 )
273297 assert .NoError (t , err )
298+
299+ // We expect no CAS operation failures.
300+ metrics , err := reg .Gather ()
301+ require .NoError (t , err )
302+
303+ assert .Equal (t , uint64 (0 ), util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
304+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
305+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "5.*" ),
306+ }))
307+ assert .Greater (t , util .GetSumOfHistogramSampleCount (metrics , "cortex_kv_request_duration_seconds" , labels.Selector {
308+ labels .MustNewMatcher (labels .MatchEqual , "operation" , "CAS" ),
309+ labels .MustNewMatcher (labels .MatchRegexp , "status_code" , "2.*" ),
310+ }), uint64 (0 ))
274311}
275312
276313// Test that writes only happen every update timeout.
0 commit comments