@@ -313,5 +313,113 @@ var _ = Describe("Operator HA tests", Label("e2e", "pr"), func() {
313
313
factory .DeleteDataLoader (fdbCluster .GetPrimary ())
314
314
})
315
315
})
316
+
317
+ PWhen ("when a remote side has network latency issues and a pod gets replaced" , func () {
318
+ /*
319
+
320
+ TODO (johscheuer): This test should be running with a bigger multi-region cluster e.g.:
321
+
322
+ config := fixtures.DefaultClusterConfigWithHaMode(fixtures.HaFourZoneSingleSat, false)
323
+ config.StorageServerPerPod = 8
324
+ config.MachineCount = 10
325
+ config.DisksPerMachine = 8
326
+
327
+ */
328
+ var experiment * fixtures.ChaosMeshExperiment
329
+
330
+ BeforeEach (func () {
331
+ dcID := fdbCluster .GetRemote ().GetCluster ().Spec .DataCenter
332
+
333
+ status := fdbCluster .GetPrimary ().GetStatus ()
334
+
335
+ var processGroupID fdbv1beta2.ProcessGroupID
336
+ for _ , process := range status .Cluster .Processes {
337
+ dc , ok := process .Locality [fdbv1beta2 .FDBLocalityDCIDKey ]
338
+ if ! ok || dc != dcID {
339
+ continue
340
+ }
341
+
342
+ var isLog bool
343
+ for _ , role := range process .Roles {
344
+ if role .Role == "log" {
345
+ isLog = true
346
+ break
347
+ }
348
+ }
349
+
350
+ if ! isLog {
351
+ continue
352
+ }
353
+
354
+ processGroupID = fdbv1beta2 .ProcessGroupID (process .Locality [fdbv1beta2 .FDBLocalityInstanceIDKey ])
355
+ break
356
+ }
357
+
358
+ log .Println ("Will inject chaos into" , processGroupID , "and replace it" )
359
+ var replacedPod corev1.Pod
360
+ for _ , pod := range fdbCluster .GetRemote ().GetLogPods ().Items {
361
+ if fixtures .GetProcessGroupID (pod ) != processGroupID {
362
+ continue
363
+ }
364
+
365
+ replacedPod = pod
366
+ break
367
+ }
368
+
369
+ log .Println ("Inject latency chaos" )
370
+ experiment = factory .InjectNetworkLatency (
371
+ chaosmesh.PodSelectorSpec {
372
+ GenericSelectorSpec : chaosmesh.GenericSelectorSpec {
373
+ Namespaces : []string {fdbCluster .GetRemote ().Namespace ()},
374
+ LabelSelectors : fdbCluster .GetRemote ().GetCachedCluster ().GetMatchLabels (),
375
+ },
376
+ },
377
+ chaosmesh.PodSelectorSpec {
378
+ GenericSelectorSpec : chaosmesh.GenericSelectorSpec {
379
+ Namespaces : []string {
380
+ fdbCluster .GetPrimary ().Namespace (),
381
+ fdbCluster .GetPrimarySatellite ().Namespace (),
382
+ fdbCluster .GetRemote ().Namespace (),
383
+ fdbCluster .GetRemoteSatellite ().Namespace (),
384
+ },
385
+ ExpressionSelectors : []metav1.LabelSelectorRequirement {
386
+ {
387
+ Key : fdbv1beta2 .FDBClusterLabel ,
388
+ Operator : metav1 .LabelSelectorOpExists ,
389
+ },
390
+ },
391
+ },
392
+ }, chaosmesh .Both ,
393
+ & chaosmesh.DelaySpec {
394
+ Latency : "250ms" ,
395
+ Correlation : "100" ,
396
+ Jitter : "0" ,
397
+ })
398
+
399
+ // TODO (johscheuer): Allow to have this as a long running task until the test is done.
400
+ factory .CreateDataLoaderIfAbsentWithWait (fdbCluster .GetPrimary (), false )
401
+
402
+ time .Sleep (1 * time .Minute )
403
+ log .Println ("replacedPod" , replacedPod .Name , "useLocalitiesForExclusion" , fdbCluster .GetPrimary ().GetCluster ().UseLocalitiesForExclusion ())
404
+ fdbCluster .GetRemote ().ReplacePod (replacedPod , true )
405
+ })
406
+
407
+ It ("should exclude and remove the pod" , func () {
408
+ Eventually (func () []fdbv1beta2.ExcludedServers {
409
+ status := fdbCluster .GetPrimary ().GetStatus ()
410
+ excludedServers := status .Cluster .DatabaseConfiguration .ExcludedServers
411
+ log .Println ("excludedServers" , excludedServers )
412
+ return excludedServers
413
+ }).WithTimeout (15 * time .Minute ).WithPolling (1 * time .Second ).Should (BeEmpty ())
414
+ })
415
+
416
+ AfterEach (func () {
417
+ Expect (fdbCluster .GetRemote ().ClearProcessGroupsToRemove ()).NotTo (HaveOccurred ())
418
+ factory .DeleteChaosMeshExperimentSafe (experiment )
419
+ // Making sure we included back all the process groups after exclusion is complete.
420
+ Expect (fdbCluster .GetPrimary ().GetStatus ().Cluster .DatabaseConfiguration .ExcludedServers ).To (BeEmpty ())
421
+ factory .DeleteDataLoader (fdbCluster .GetPrimary ())
422
+ })
423
+ })
316
424
})
317
425
})
0 commit comments