Skip to content

Commit 8f38a69

Browse files
authored
Fix bug where syntax error on policy incremented wrong metric (#79)
Simple checks added to differentiate between system and user errors to reduce false positive system errors Ref: https://issues.redhat.com/browse/ACM-3160 Signed-off-by: Jeffrey Luo <[email protected]>
1 parent c7dfba5 commit 8f38a69

File tree

4 files changed

+162
-2
lines changed

4 files changed

+162
-2
lines changed

controllers/templatesync/template_sync.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,12 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
564564

565565
tLogger.Error(resultError, "Failed to create policy template")
566566

567-
policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "create-error").Inc()
567+
// check for syntax error in policy
568+
if k8serrors.IsInvalid(err) {
569+
policyUserErrorsCounter.WithLabelValues(instance.Name, tName, "format-error").Inc()
570+
} else {
571+
policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "create-error").Inc()
572+
}
568573

569574
continue
570575
}
@@ -757,7 +762,12 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
757762

758763
tLogger.Error(err, "Failed to update the policy template")
759764

760-
policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "patch-error").Inc()
765+
// check for syntax error in policy
766+
if k8serrors.IsInvalid(err) {
767+
policyUserErrorsCounter.WithLabelValues(instance.Name, tName, "format-error").Inc()
768+
} else {
769+
policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "patch-error").Inc()
770+
}
761771

762772
continue
763773
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright (c) 2020 Red Hat, Inc.
2+
// Copyright Contributors to the Open Cluster Management project
3+
4+
package e2e
5+
6+
import (
7+
"strconv"
8+
9+
. "github.com/onsi/ginkgo/v2"
10+
. "github.com/onsi/gomega"
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
policyUtils "open-cluster-management.io/governance-policy-propagator/test/utils"
13+
14+
"open-cluster-management.io/governance-policy-framework-addon/test/utils"
15+
)
16+
17+
var _ = Describe("Test proper metrics handling on syntax error", Ordered, func() {
18+
userMetricName := "policy_user_errors_total"
19+
systemMetricName := "policy_system_errors_total"
20+
policyName := "case21-policy"
21+
errPolicyFile := "../resources/case21_syntax_error_metric/case21-test-policy-err.yaml"
22+
correctPolicyFile := "../resources/case21_syntax_error_metric/case21-test-policy.yaml"
23+
24+
cleanup := func() {
25+
By("Deleting a policy on hub cluster in ns:" + clusterNamespaceOnHub)
26+
// Clean up and ignore any errors (in case it was deleted previously)
27+
_, _ = kubectlHub("delete", "-f", errPolicyFile, "-n", clusterNamespaceOnHub, "--ignore-not-found")
28+
opt := metav1.ListOptions{}
29+
policyUtils.ListWithTimeout(clientHubDynamic, gvrPolicy, opt, 0, true, defaultTimeoutSeconds)
30+
policyUtils.ListWithTimeout(clientManagedDynamic, gvrPolicy, opt, 0, true, defaultTimeoutSeconds)
31+
}
32+
33+
AfterAll(cleanup)
34+
35+
It("Should increment user error metric when attempting to apply policy with syntax error", func() {
36+
hubApplyPolicy(policyName, errPolicyFile)
37+
38+
By("Checking for the " + userMetricName + " metric on the template-sync controller")
39+
values := []string{}
40+
Eventually(func() []string {
41+
values = utils.GetMetrics(userMetricName, policyName)
42+
43+
return values
44+
}, defaultTimeoutSeconds, 1).Should(HaveLen(1))
45+
Expect(strconv.Atoi(values[0])).To(BeNumerically(">=", 1))
46+
})
47+
48+
It("Should not increment system error metric", func() {
49+
hubApplyPolicy(policyName, errPolicyFile)
50+
51+
By("Checking for the " + systemMetricName + " metric on the template-sync controller")
52+
values := []string{}
53+
Eventually(func() []string {
54+
values = utils.GetMetrics(systemMetricName, policyName)
55+
56+
return values
57+
}, defaultTimeoutSeconds, 1).Should(HaveLen(0))
58+
})
59+
60+
It("Should clean up user error metric on policy deletion", func() {
61+
By(
62+
"Deleting policy " + policyName + " on hub cluster " +
63+
"in ns:" + clusterNamespaceOnHub,
64+
)
65+
cleanup()
66+
By("Checking for the " + userMetricName + " metric on the template-sync controller")
67+
values := []string{}
68+
Eventually(func() []string {
69+
values = utils.GetMetrics(userMetricName, policyName)
70+
71+
return values
72+
}, defaultTimeoutSeconds, 1).Should(HaveLen(0))
73+
})
74+
75+
It("Should increment user error metric when patching a syntax error onto a correct policy", func() {
76+
hubApplyPolicy(policyName, correctPolicyFile)
77+
hubApplyPolicy(policyName, errPolicyFile)
78+
79+
By("Checking for the " + userMetricName + " metric on the template-sync controller")
80+
values := []string{}
81+
Eventually(func() []string {
82+
values = utils.GetMetrics(userMetricName, policyName)
83+
84+
return values
85+
}, defaultTimeoutSeconds, 1).Should(HaveLen(1))
86+
Expect(strconv.Atoi(values[0])).To(BeNumerically(">=", 1))
87+
})
88+
89+
It("Should not increment system error metric", func() {
90+
hubApplyPolicy(policyName, errPolicyFile)
91+
92+
By("Checking for the " + systemMetricName + " metric on the template-sync controller")
93+
values := []string{}
94+
Eventually(func() []string {
95+
values = utils.GetMetrics(systemMetricName, policyName)
96+
97+
return values
98+
}, defaultTimeoutSeconds, 1).Should(HaveLen(0))
99+
})
100+
})
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
apiVersion: policy.open-cluster-management.io/v1
2+
kind: Policy
3+
metadata:
4+
name: case21-policy
5+
labels:
6+
policy.open-cluster-management.io/cluster-name: managed
7+
policy.open-cluster-management.io/cluster-namespace: managed
8+
policy.open-cluster-management.io/root-policy: case21-policy
9+
spec:
10+
remediationAction: inform
11+
disabled: false
12+
policy-templates:
13+
- objectDefinition:
14+
apiVersion: policy.open-cluster-management.io/v1
15+
kind: ConfigurationPolicy
16+
metadata:
17+
name: case21-configpolicy
18+
spec:
19+
remediationAction: inform
20+
object-templates: false # syntax error
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: policy.open-cluster-management.io/v1
2+
kind: Policy
3+
metadata:
4+
name: case21-policy
5+
labels:
6+
policy.open-cluster-management.io/cluster-name: managed
7+
policy.open-cluster-management.io/cluster-namespace: managed
8+
policy.open-cluster-management.io/root-policy: case21-policy
9+
spec:
10+
remediationAction: inform
11+
disabled: false
12+
policy-templates:
13+
- objectDefinition:
14+
apiVersion: policy.open-cluster-management.io/v1
15+
kind: ConfigurationPolicy
16+
metadata:
17+
name: case21-configpolicy
18+
spec:
19+
remediationAction: inform
20+
object-templates:
21+
- complianceType: musthave
22+
objectDefinition:
23+
apiVersion: v1
24+
kind: Pod
25+
metadata:
26+
name: nginx-pod-e2e
27+
namespace: default
28+
spec:
29+
containers:
30+
- name: nginx

0 commit comments

Comments
 (0)