@@ -505,5 +505,81 @@ var _ = Describe("Configuration Anomaly Detection", Ordered, func() {
505
505
ginkgo .GinkgoWriter .Println ("Step 7: Test completed: Node NotReady condition simulated and checked." )
506
506
}
507
507
})
508
+
509
+ It ("AWS CCS: clustermonitoringerrorbudgetburn" , func (ctx context.Context ) {
510
+ if provider == "aws" {
511
+ const (
512
+ namespace = "openshift-user-workload-monitoring"
513
+ configMapName = "user-workload-monitoring-config"
514
+ )
515
+
516
+ fmt .Println ("Step 0: Fetching cluster info" )
517
+ response , err := ocme2eCli .ClustersMgmt ().V1 ().Clusters ().Cluster (clusterID ).Get ().Send ()
518
+ Expect (err ).ToNot (HaveOccurred (), "Failed to get cluster from OCM" )
519
+ cluster := response .Body ()
520
+ Expect (cluster ).ToNot (BeNil (), "Cluster response is nil" )
521
+
522
+ fmt .Println ("Step 1: Getting service logs before misconfiguration" )
523
+ logs , err := utils .GetServiceLogs (ocmCli , cluster )
524
+ Expect (err ).ToNot (HaveOccurred (), "Failed to fetch service logs before misconfig" )
525
+ logsBefore := logs .Items ().Slice ()
526
+
527
+ fmt .Println ("Step 2: Backing up current ConfigMap" )
528
+ originalCM := & corev1.ConfigMap {}
529
+ err = k8s .Get (ctx , configMapName , namespace , originalCM )
530
+ Expect (err ).ToNot (HaveOccurred (), "Failed to fetch original ConfigMap" )
531
+
532
+ backupCM := & corev1.ConfigMap {}
533
+ err = k8s .Get (ctx , configMapName , namespace , backupCM )
534
+ Expect (err ).ToNot (HaveOccurred (), "Failed to backup original ConfigMap" )
535
+
536
+ defer func () {
537
+ fmt .Println ("Restore: Restore backup configmap" )
538
+ err = k8s .Update (ctx , backupCM )
539
+ Expect (err ).ToNot (HaveOccurred (), "Restore the backup ConfigMap" )
540
+
541
+ fmt .Println ("Restore: Get restore backup configmap" )
542
+ restoreBackupCM := & corev1.ConfigMap {}
543
+ err = k8s .Get (ctx , configMapName , namespace , restoreBackupCM )
544
+ Expect (err ).ToNot (HaveOccurred (), "Failed to backup original ConfigMap" )
545
+
546
+ fmt .Println ("Restore: Comparing backup and restored ConfigMaps" )
547
+ Expect (restoreBackupCM .Data ).To (Equal (backupCM .Data ), "Restored ConfigMap data does not match the backup" )
548
+ Expect (restoreBackupCM .BinaryData ).To (Equal (backupCM .BinaryData ), "Restored ConfigMap binary data does not match the backup" )
549
+ }()
550
+
551
+ fmt .Println ("Step 3: Injecting invalid config to simulate misconfiguration" )
552
+ err = retry .RetryOnConflict (retry .DefaultRetry , func () error {
553
+ err := k8s .Get (ctx , configMapName , namespace , originalCM )
554
+ if err != nil {
555
+ return err
556
+ }
557
+ if originalCM .Data == nil {
558
+ originalCM .Data = make (map [string ]string )
559
+ }
560
+ originalCM .Data ["user-workload-monitoring.yaml" ] = `
561
+ prometheus:
562
+ retention: 24h
563
+ # broken: : invalid_yaml
564
+ // `
565
+
566
+ return k8s .Update (ctx , originalCM )
567
+ })
568
+ Expect (err ).ToNot (HaveOccurred (), "Failed to apply invalid config" )
569
+
570
+ fmt .Println ("Step 4 : Waiting to pagerduty alert..." )
571
+ _ , err = testPdClient .TriggerIncident ("ClusterMonitoringErrorBudgetBurnSRE" , clusterID )
572
+ Expect (err ).NotTo (HaveOccurred (), "Failed to trigger silent PagerDuty alert" )
573
+
574
+ time .Sleep (2 * time .Minute )
575
+
576
+ fmt .Println ("Step 5: Fetching service logs after misconfiguration" )
577
+ logs , err = utils .GetServiceLogs (ocmCli , cluster )
578
+ Expect (err ).ToNot (HaveOccurred (), "Failed to get service logs" )
579
+ logsAfter := logs .Items ().Slice ()
580
+
581
+ Expect (logsAfter ).To (HaveLen (len (logsBefore )), "Service logs count changed after scale down/up" )
582
+ }
583
+ })
508
584
509
585
}, ginkgo .ContinueOnFailure )
0 commit comments