@@ -37,6 +37,9 @@ import (
3737 "github.com/GoogleContainerTools/config-sync/pkg/core/k8sobjects"
3838 "github.com/GoogleContainerTools/config-sync/pkg/kinds"
3939 "github.com/GoogleContainerTools/config-sync/pkg/metadata"
40+ rgmetrics "github.com/GoogleContainerTools/config-sync/pkg/resourcegroup/controllers/metrics"
41+ prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
42+ prometheusmodel "github.com/prometheus/common/model"
4043 appsv1 "k8s.io/api/apps/v1"
4144 corev1 "k8s.io/api/core/v1"
4245 apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -757,6 +760,261 @@ func deleteObject(nt *nomostest.NT, obj client.Object) error {
757760 return nil
758761}
759762
763+ // TestReconcilerFinalizer_ResourceGroupMetricsReset tests that when a ResourceGroup
764+ // is deleted, all resource-related metrics are reset to 0.
765+ func TestReconcilerFinalizer_ResourceGroupMetricsReset (t * testing.T ) {
766+ nt := nomostest .New (t , nomostesting .MultiRepos )
767+ rootSyncID := nomostest .DefaultRootSyncID
768+ rootSyncKey := rootSyncID .ObjectKey
769+ rootSyncGitRepo := nt .SyncSourceGitReadWriteRepository (rootSyncID )
770+
771+ deployment1NN := types.NamespacedName {Name : "helloworld-1" , Namespace : testNs }
772+ namespace1NN := types.NamespacedName {Name : testNs }
773+ safetyNamespace1NN := types.NamespacedName {Name : rootSyncGitRepo .SafetyNSName }
774+
775+ nt .T .Cleanup (func () {
776+ cleanupSingleLevel (nt ,
777+ rootSyncKey ,
778+ deployment1NN ,
779+ namespace1NN , safetyNamespace1NN )
780+ })
781+
782+ // Add namespace to RootSync
783+ namespace1 := k8sobjects .NamespaceObject (namespace1NN .Name )
784+ nt .Must (rootSyncGitRepo .Add (nomostest .StructuredNSPath (namespace1NN .Name , namespace1NN .Name ), namespace1 ))
785+
786+ // Add deployment-helloworld-1 to RootSync
787+ deployment1Path := nomostest .StructuredNSPath (deployment1NN .Namespace , "deployment-helloworld-1" )
788+ deployment1 := loadDeployment (nt , "../testdata/deployment-helloworld.yaml" )
789+ deployment1 .SetName (deployment1NN .Name )
790+ deployment1 .SetNamespace (deployment1NN .Namespace )
791+ nt .Must (rootSyncGitRepo .Add (deployment1Path , deployment1 ))
792+ nt .Must (rootSyncGitRepo .CommitAndPush ("Adding deployment helloworld-1 to RootSync" ))
793+ nt .Must (nt .WatchForAllSyncs ())
794+ nt .Must (nt .Watcher .WatchForCurrentStatus (kinds .Deployment (), deployment1 .Name , deployment1 .Namespace ))
795+
796+ // Wait for ResourceGroup to be reconciled and metrics to be recorded
797+ rgNN := types .NamespacedName (rootSyncKey )
798+ nt .Must (nt .Watcher .WatchObject (kinds .ResourceGroup (), rgNN .Name , rgNN .Namespace ,
799+ testwatcher .WatchPredicates (
800+ testpredicates .StatusEquals (nt .Scheme , kstatus .CurrentStatus ),
801+ )))
802+
803+ // Verify metrics are non-zero before deletion
804+ nt .T .Log ("Verifying resourcegroup metrics are non-zero before deletion" )
805+ nt .Must (validateResourceGroupMetricsNonZero (nt , rgNN ))
806+
807+ // Tail reconciler logs and print if there's an error.
808+ ctx , cancel := context .WithCancel (context .Background ())
809+ defer cancel ()
810+ go nomostest .TailReconcilerLogs (ctx , nt , nomostest .RootReconcilerObjectKey (rootSyncKey .Name ))
811+
812+ // Enable deletion propagation
813+ nt .T .Log ("Enabling RootSync deletion propagation" )
814+ rootSync := k8sobjects .RootSyncObjectV1Beta1 (rootSyncKey .Name )
815+ err := nt .KubeClient .Get (rootSync .Name , rootSync .Namespace , rootSync )
816+ if err != nil {
817+ nt .T .Fatal (err )
818+ }
819+ if metadata .SetDeletionPropagationPolicy (rootSync , metadata .DeletionPropagationPolicyForeground ) {
820+ err = nt .KubeClient .Update (rootSync )
821+ if err != nil {
822+ nt .T .Fatal (err )
823+ }
824+ }
825+ nt .Must (nt .Watcher .WatchObject (kinds .RootSyncV1Beta1 (), rootSync .GetName (), rootSync .GetNamespace (),
826+ testwatcher .WatchPredicates (
827+ testpredicates .StatusEquals (nt .Scheme , kstatus .CurrentStatus ),
828+ testpredicates .HasFinalizer (metadata .ReconcilerFinalizer ),
829+ )))
830+
831+ // Delete the RootSync
832+ nt .T .Log ("Deleting RootSync" )
833+ err = nt .KubeClient .Delete (rootSync )
834+ if err != nil {
835+ nt .T .Fatal (err )
836+ }
837+
838+ // Wait for ResourceGroup to be deleted
839+ nt .Must (nt .Watcher .WatchForNotFound (kinds .ResourceGroup (), rgNN .Name , rgNN .Namespace ))
840+
841+ // Verify all resourcegroup metrics are reset to 0
842+ nt .T .Log ("Verifying resourcegroup metrics are reset to 0 after deletion" )
843+ nt .Must (validateResourceGroupMetricsReset (nt , rgNN ))
844+ }
845+
846+ // validateResourceGroupMetricsNonZero verifies that resourcegroup metrics are non-zero
847+ func validateResourceGroupMetricsNonZero (nt * nomostest.NT , rgNN types.NamespacedName ) error {
848+ return nomostest .ValidateMetrics (nt ,
849+ resourceGroupMetricHasValueAtLeast (nt , rgmetrics .ResourceCountName , rgNN , 1 ),
850+ resourceGroupMetricHasValueAtLeast (nt , rgmetrics .ReadyResourceCountName , rgNN , 1 ),
851+ resourceGroupMetricHasValueAtLeast (nt , rgmetrics .NamespaceCountName , rgNN , 1 ),
852+ )
853+ }
854+
855+ // validateResourceGroupMetricsReset verifies that all resourcegroup metrics are reset to 0
856+ func validateResourceGroupMetricsReset (nt * nomostest.NT , rgNN types.NamespacedName ) error {
857+ return nomostest .ValidateMetrics (nt ,
858+ resourceGroupMetricHasValue (nt , rgmetrics .ResourceCountName , rgNN , 0 ),
859+ resourceGroupMetricHasValue (nt , rgmetrics .ReadyResourceCountName , rgNN , 0 ),
860+ resourceGroupMetricHasValue (nt , rgmetrics .NamespaceCountName , rgNN , 0 ),
861+ resourceGroupMetricHasValue (nt , rgmetrics .ClusterScopedResourceCountName , rgNN , 0 ),
862+ resourceGroupMetricHasValue (nt , rgmetrics .CRDCountName , rgNN , 0 ),
863+ resourceGroupMetricHasValue (nt , rgmetrics .KCCResourceCountName , rgNN , 0 ),
864+ resourceGroupMetricHasValue (nt , rgmetrics .PipelineErrorName , rgNN , 0 ),
865+ )
866+ }
867+
868+ const (
869+ prometheusConfigSyncMetricPrefix = "config_sync_"
870+ )
871+
872+ // resourceGroupMetricHasValue returns a MetricsPredicate that validates a resourcegroup metric has the expected value.
873+ // If the expected value is zero, the metric must be zero or not found.
874+ func resourceGroupMetricHasValue (nt * nomostest.NT , metricName string , rgNN types.NamespacedName , value int64 ) nomostest.MetricsPredicate {
875+ return func (ctx context.Context , v1api prometheusv1.API ) error {
876+ fullMetricName := fmt .Sprintf ("%s%s" , prometheusConfigSyncMetricPrefix , metricName )
877+ labels := prometheusmodel.LabelSet {
878+ prometheusmodel .LabelName ("resourcegroup" ): prometheusmodel .LabelValue (rgNN .String ()),
879+ }
880+ query := fmt .Sprintf ("%s%s" , fullMetricName , labels )
881+ return validateResourceGroupMetricValue (ctx , nt , v1api , query , float64 (value ), value == 0 )
882+ }
883+ }
884+
885+ // resourceGroupMetricHasValueAtLeast returns a MetricsPredicate that validates a resourcegroup metric has at least the expected value.
886+ func resourceGroupMetricHasValueAtLeast (nt * nomostest.NT , metricName string , rgNN types.NamespacedName , value int64 ) nomostest.MetricsPredicate {
887+ return func (ctx context.Context , v1api prometheusv1.API ) error {
888+ fullMetricName := fmt .Sprintf ("%s%s" , prometheusConfigSyncMetricPrefix , metricName )
889+ labels := prometheusmodel.LabelSet {
890+ prometheusmodel .LabelName ("resourcegroup" ): prometheusmodel .LabelValue (rgNN .String ()),
891+ }
892+ query := fmt .Sprintf ("%s%s" , fullMetricName , labels )
893+ return validateResourceGroupMetricValueAtLeast (ctx , nt , v1api , query , float64 (value ))
894+ }
895+ }
896+
897+ // validateResourceGroupMetricValue validates that a metric has the expected value.
898+ // If allowMissing is true and the metric doesn't exist, it's considered valid (for zero values).
899+ func validateResourceGroupMetricValue (ctx context.Context , nt * nomostest.NT , v1api prometheusv1.API , query string , value float64 , allowMissing bool ) error {
900+ response , err := metricQueryNow (ctx , nt , v1api , query )
901+ if err != nil {
902+ if allowMissing {
903+ return nil // Missing metric is acceptable when expecting zero
904+ }
905+ return err
906+ }
907+
908+ switch result := response .(type ) {
909+ case prometheusmodel.Vector :
910+ if len (result ) == 0 {
911+ if allowMissing {
912+ return nil // No results is acceptable when expecting zero
913+ }
914+ return fmt .Errorf ("no results from prometheus query: %s" , query )
915+ }
916+ nt .Logger .Debugf ("prometheus vector response:\n %s" , result )
917+ for _ , sample := range result {
918+ if sample .Value .Equal (prometheusmodel .SampleValue (value )) {
919+ return nil
920+ }
921+ }
922+ var values []prometheusmodel.SampleValue
923+ for _ , sample := range result {
924+ values = append (values , sample .Value )
925+ }
926+ return fmt .Errorf ("value %v not found in vector response %v for query: %s" , value , values , query )
927+ case prometheusmodel.Matrix :
928+ if len (result ) == 0 {
929+ if allowMissing {
930+ return nil // No results is acceptable when expecting zero
931+ }
932+ return fmt .Errorf ("no results from prometheus query: %s" , query )
933+ }
934+ nt .Logger .Debugf ("prometheus matrix response:\n %s" , result )
935+ for _ , samples := range result {
936+ for _ , sample := range samples .Values {
937+ if sample .Value .Equal (prometheusmodel .SampleValue (value )) {
938+ return nil
939+ }
940+ }
941+ }
942+ var values []prometheusmodel.SampleValue
943+ for _ , samples := range result {
944+ for _ , sample := range samples .Values {
945+ values = append (values , sample .Value )
946+ }
947+ }
948+ return fmt .Errorf ("value %v not found in matrix response %v for query: %s" , value , values , query )
949+ default :
950+ return fmt .Errorf ("unsupported prometheus response: %T" , response )
951+ }
952+ }
953+
954+ // validateResourceGroupMetricValueAtLeast validates that a metric has at least the expected value.
955+ func validateResourceGroupMetricValueAtLeast (ctx context.Context , nt * nomostest.NT , v1api prometheusv1.API , query string , value float64 ) error {
956+ response , err := metricQueryNow (ctx , nt , v1api , query )
957+ if err != nil {
958+ return err
959+ }
960+
961+ switch result := response .(type ) {
962+ case prometheusmodel.Vector :
963+ if len (result ) == 0 {
964+ return fmt .Errorf ("no results from prometheus query: %s" , query )
965+ }
966+ nt .Logger .Debugf ("prometheus vector response:\n %s" , result )
967+ for _ , sample := range result {
968+ if sample .Value >= prometheusmodel .SampleValue (value ) {
969+ return nil
970+ }
971+ }
972+ var values []prometheusmodel.SampleValue
973+ for _ , sample := range result {
974+ values = append (values , sample .Value )
975+ }
976+ return fmt .Errorf ("value %v not found in vector response %v for query: %s" , value , values , query )
977+ case prometheusmodel.Matrix :
978+ if len (result ) == 0 {
979+ return fmt .Errorf ("no results from prometheus query: %s" , query )
980+ }
981+ nt .Logger .Debugf ("prometheus matrix response:\n %s" , result )
982+ for _ , samples := range result {
983+ for _ , sample := range samples .Values {
984+ if sample .Value >= prometheusmodel .SampleValue (value ) {
985+ return nil
986+ }
987+ }
988+ }
989+ var values []prometheusmodel.SampleValue
990+ for _ , samples := range result {
991+ for _ , sample := range samples .Values {
992+ values = append (values , sample .Value )
993+ }
994+ }
995+ return fmt .Errorf ("value %v not found in matrix response %v for query: %s" , value , values , query )
996+ default :
997+ return fmt .Errorf ("unsupported prometheus response: %T" , response )
998+ }
999+ }
1000+
1001+ // metricQueryNow performs the specified query with the default timeout.
1002+ func metricQueryNow (ctx context.Context , nt * nomostest.NT , v1api prometheusv1.API , query string ) (prometheusmodel.Value , error ) {
1003+ ctx , cancel := context .WithTimeout (ctx , 10 * time .Second )
1004+ defer cancel ()
1005+
1006+ nt .Logger .Debugf ("prometheus query: %s" , query )
1007+ response , warnings , err := v1api .Query (ctx , query , time .Now ())
1008+ if err != nil {
1009+ return nil , err
1010+ }
1011+ if len (warnings ) > 0 {
1012+ nt .T .Logf ("prometheus warnings: %v" , warnings )
1013+ }
1014+
1015+ return response , nil
1016+ }
1017+
7601018func loadDeployment (nt * nomostest.NT , path string ) * appsv1.Deployment {
7611019 specBytes , err := os .ReadFile (path )
7621020 if err != nil {
0 commit comments