@@ -668,7 +668,7 @@ func TestPathologicalEventsTopologyAwareHintsDisabled(t *testing.T) {
668668 }
669669}
670670
671- func TestPathologicalEventsPrometheusReadinessProbeErrorsDuringUpgrades (t * testing.T ) {
671+ func TestPathologicalEventsPrometheusReadinessProbeErrors (t * testing.T ) {
672672 const namespace = "openshift-monitoring"
673673
674674 unhealthyReasonPathologicalMessageWithHumanMessage := func (humanMessage string , repetitionCount int ) monitorapi.Message {
@@ -682,90 +682,91 @@ func TestPathologicalEventsPrometheusReadinessProbeErrorsDuringUpgrades(t *testi
682682 }
683683 }
684684
685- openshiftMonitoringNsLocatorWithPodKey := func (pod string ) monitorapi.Locator {
685+ nsLocatorWithPodKey := func (pod , ns string ) monitorapi.Locator {
686686 return monitorapi.Locator {
687687 Type : monitorapi .LocatorTypePod ,
688688 Keys : map [monitorapi.LocatorKey ]string {
689- monitorapi .LocatorNamespaceKey : "openshift-monitoring" ,
689+ monitorapi .LocatorNamespaceKey : ns ,
690690 monitorapi .LocatorPodKey : pod ,
691691 },
692692 }
693693 }
694694
695695 tests := []struct {
696696 name string
697- intervals []monitorapi.Interval
698697 expectedMessage string
698+ pod string
699+ ns string
700+ humanMessage string
701+ repetitionCount int
699702 }{
700703 {
701- name : "Readiness probe error (stopping container) on first Prometheus pod" ,
702- intervals : []monitorapi.Interval {
703- {
704- Condition : monitorapi.Condition {
705- Locator : openshiftMonitoringNsLocatorWithPodKey ("prometheus-k8s-0" ),
706- Message : unhealthyReasonPathologicalMessageWithHumanMessage ("Readiness probe errored: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" , 100 ),
707- },
708- },
709- },
704+ name : "Readiness probe error (stopping container) on first Prometheus pod" ,
705+ expectedMessage : "" ,
706+ pod : "prometheus-k8s-0" ,
707+ ns : namespace ,
708+ humanMessage : "Readiness probe errored: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" ,
709+ repetitionCount : 100 ,
710710 },
711711 {
712- name : "Readiness probe error (terminated container) on second Prometheus pod" ,
713- intervals : []monitorapi.Interval {
714- {
715- Condition : monitorapi.Condition {
716- Locator : openshiftMonitoringNsLocatorWithPodKey ("prometheus-k8s-1" ),
717- Message : unhealthyReasonPathologicalMessageWithHumanMessage ("Readiness probe errored: rpc error: code = NotFound desc = container is not created or running: checking if PID of 58577e7deb7b8ae87b8029b9988fa268613748d0743ce989748f27e52b199ef5 is running failed: container process not found" , 100 ),
718- },
719- },
720- },
712+ name : "Readiness probe error (terminated container) on second Prometheus pod" ,
713+ expectedMessage : "" ,
714+ pod : "prometheus-k8s-1" ,
715+ ns : namespace ,
716+ humanMessage : "Readiness probe errored: rpc error: code = NotFound desc = container is not created or running: checking if PID of 58577e7deb7b8ae87b8029b9988fa268613748d0743ce989748f27e52b199ef5 is running failed: container process not found" ,
717+ repetitionCount : 100 ,
721718 },
722719 {
723- name : "Readiness probe error (stopping container, different human message) on second Prometheus pod" ,
724- intervals : []monitorapi.Interval {
725- {
726- Condition : monitorapi.Condition {
727- Locator : openshiftMonitoringNsLocatorWithPodKey ("prometheus-k8s-1" ),
728- Message : unhealthyReasonPathologicalMessageWithHumanMessage ("Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" , 100 ),
729- },
730- },
731- },
720+ name : "Readiness probe error (stopping container, different human message) on second Prometheus pod" ,
721+ expectedMessage : "" ,
722+ pod : "prometheus-k8s-1" ,
723+ ns : namespace ,
724+ humanMessage : "Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" ,
725+ repetitionCount : 100 ,
732726 },
733727 {
734- name : "Readiness probe error (stopping container, different human message ) on non-existent Prometheus pod should not be ignored" ,
735- intervals : []monitorapi. Interval {
736- {
737- Condition : monitorapi. Condition {
738- Locator : openshiftMonitoringNsLocatorWithPodKey ( "prometheus-k8s-2" ) ,
739- Message : unhealthyReasonPathologicalMessageWithHumanMessage ( "Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" , 100 ) ,
740- },
741- },
742- } ,
728+ name : "Readiness probe error (stopping container) on a Prometheus pod in a different namespace should not be ignored" ,
729+ expectedMessage : "1 events happened too frequently \n \n event happened 100 times, something is wrong: namespace/foo pod/prometheus-k8s-1 - reason/Unhealthy Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1 (00:00:00Z) result=reject " ,
730+ pod : "prometheus-k8s-1" ,
731+ ns : "foo" ,
732+ humanMessage : "Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" ,
733+ repetitionCount : 100 ,
734+ },
735+ {
736+ name : "Readiness probe error (stopping container) on non-existent Prometheus pod should not be ignored" ,
743737 expectedMessage : "1 events happened too frequently\n \n event happened 100 times, something is wrong: namespace/openshift-monitoring pod/prometheus-k8s-2 - reason/Unhealthy Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1 (00:00:00Z) result=reject " ,
738+ pod : "prometheus-k8s-2" ,
739+ ns : namespace ,
740+ humanMessage : "Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" ,
741+ repetitionCount : 100 ,
744742 },
745743 {
746- name : "Readiness probe error (stopping container, different human message) on a Prometheus pod should not be ignored above the acceptable limit" ,
747- intervals : []monitorapi.Interval {
748- {
749- Condition : monitorapi.Condition {
750- Locator : openshiftMonitoringNsLocatorWithPodKey ("prometheus-k8s-1" ),
751- Message : unhealthyReasonPathologicalMessageWithHumanMessage ("Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" , 101 ),
752- },
753- },
754- },
744+ name : "Readiness probe error (stopping container, different human message) on a Prometheus pod should not be ignored above the acceptable limit" ,
755745 expectedMessage : "1 events happened too frequently\n \n event happened 101 times, something is wrong: namespace/openshift-monitoring pod/prometheus-k8s-1 - reason/Unhealthy Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1 (00:00:00Z) result=reject " ,
746+ pod : "prometheus-k8s-1" ,
747+ ns : namespace ,
748+ humanMessage : "Readiness probe errored and resulted in unknown state: rpc error: code = Unknown desc = command error: cannot register an exec PID: container is stopping, stdout: , stderr: , exit code -1" ,
749+ repetitionCount : 101 ,
756750 },
757751 }
758752
759753 for _ , test := range tests {
760754 t .Run (test .name , func (t * testing.T ) {
761- events := monitorapi .Intervals (test .intervals )
755+ events := monitorapi .Intervals ([]monitorapi.Interval {
756+ {
757+ Condition : monitorapi.Condition {
758+ Locator : nsLocatorWithPodKey (test .pod , test .ns ),
759+ Message : unhealthyReasonPathologicalMessageWithHumanMessage (test .humanMessage , test .repetitionCount ),
760+ },
761+ },
762+ })
762763 evaluator := duplicateEventsEvaluator {
763- registry : NewUpgradePathologicalEventMatchers (nil , events ),
764+ registry : NewUniversalPathologicalEventMatchers (nil , events ),
764765 }
765766
766767 testName := "events should not repeat"
767768 junits := evaluator .testDuplicatedEvents (testName , false , events , nil , false )
768- jUnitName := getJUnitName (testName , namespace )
769+ jUnitName := getJUnitName (testName , test . ns )
769770 for _ , junit := range junits {
770771 if junit .Name == jUnitName {
771772 if test .expectedMessage != "" {
0 commit comments