Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"--log-level=3",
"--v=5",
"--enable-operator-policy=true",
"--evaluation-backoff=1",
"--evaluation-backoff=2",
],
"env": {
"WATCH_NAMESPACE": "managed",
Expand All @@ -35,7 +35,7 @@
"--v=5",
"--enable-operator-policy=true",
"--target-kubeconfig-path=${workspaceFolder}/kubeconfig_managed2",
"--evaluation-backoff=1",
"--evaluation-backoff=2",
],
"env": {
"WATCH_NAMESPACE": "managed",
Expand Down
45 changes: 23 additions & 22 deletions controllers/configurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"encoding/base64"
"errors"
"fmt"
"math"
"reflect"
"regexp"
"sort"
Expand Down Expand Up @@ -193,6 +192,7 @@ type ConfigurationPolicyReconciler struct {
// The number of seconds before a policy is eligible for reevaluation in watch mode (throttles frequently evaluated
// policies)
EvalBackoffSeconds uint32
ItemLimiters *PerItemRateLimiter[reconcile.Request]
// lastEvaluatedCache contains the value of the last known ConfigurationPolicy resourceVersion per UID.
// This is a workaround to account for race conditions where the status is updated but the controller-runtime cache
// has not updated yet.
Expand All @@ -212,6 +212,18 @@ type ConfigurationPolicyReconciler struct {
// Reconcile is responsible for evaluating and rescheduling ConfigurationPolicy evaluations.
func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
log := log.WithValues("name", request.Name, "namespace", request.Namespace)

if r.ItemLimiters != nil {
limiter := r.ItemLimiters.GetLimiter(request)

// Check if a token is available; if so, `Allow` will spend it and return true
if limiter.Tokens() < 1.0 || !limiter.Allow() {
log.V(2).Info("Throttling policy evaluation")

return reconcile.Result{RequeueAfter: time.Second * time.Duration(r.EvalBackoffSeconds)}, nil
}
}

policy := &policyv1.ConfigurationPolicy{}

cleanup, err := r.cleanupImmediately()
Expand Down Expand Up @@ -469,13 +481,6 @@ func (r *ConfigurationPolicyReconciler) shouldEvaluatePolicy(
}
}

lastEvaluated, err := time.Parse(time.RFC3339, policy.Status.LastEvaluated)
if err != nil {
log.Error(err, "The policy has an invalid status.lastEvaluated value. Will evaluate it now.")

return true, 0
}

usesSelector := policy.Spec.NamespaceSelector.LabelSelector != nil ||
len(policy.Spec.NamespaceSelector.Include) != 0

Expand Down Expand Up @@ -508,20 +513,6 @@ func (r *ConfigurationPolicyReconciler) shouldEvaluatePolicy(
return false, 0

case errors.Is(getIntervalErr, policyv1.ErrIsWatch):
minNextEval := lastEvaluated.Add(time.Second * time.Duration(r.EvalBackoffSeconds))
durationLeft := minNextEval.Sub(now)

if durationLeft > 0 {
log.V(1).Info(
"The policy evaluation is configured for a watch event but rescheduling the evaluation due to the "+
"configured evaluation backoff",
"evaluationBackoffSeconds", r.EvalBackoffSeconds,
"remainingSeconds", math.Round(durationLeft.Seconds()),
)

return false, durationLeft
}

log.V(1).Info("The policy evaluation is configured for a watch event. Will evaluate now.")

return true, 0
Expand All @@ -536,6 +527,16 @@ func (r *ConfigurationPolicyReconciler) shouldEvaluatePolicy(
return true, 0
}

// At this point, we have a valid evaluation interval, we can now determine
// how long we need to wait (if at all).

lastEvaluated, err := time.Parse(time.RFC3339, policy.Status.LastEvaluated)
if err != nil {
log.Error(err, "The policy has an invalid status.lastEvaluated value. Will evaluate it now.")

return true, 0
}

nextEvaluation := lastEvaluated.Add(interval)
durationLeft := nextEvaluation.Sub(now)

Expand Down
44 changes: 44 additions & 0 deletions controllers/ratelimit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) 2025 Red Hat, Inc.
// Copyright Contributors to the Open Cluster Management project

package controllers

import (
"sync"
"time"

"golang.org/x/time/rate"
)

func NewPerItemRateLimiter[T comparable](backoffSeconds uint32, burst int) *PerItemRateLimiter[T] {
return &PerItemRateLimiter[T]{
lock: sync.RWMutex{},
limiters: map[T]*rate.Limiter{},
rate: rate.Every(time.Second * time.Duration(backoffSeconds)),
burst: burst,
}
}

type PerItemRateLimiter[T comparable] struct {
lock sync.RWMutex
limiters map[T]*rate.Limiter
rate rate.Limit
burst int
}

func (l *PerItemRateLimiter[T]) GetLimiter(item T) *rate.Limiter {
l.lock.RLock()
limiter, exists := l.limiters[item]
l.lock.RUnlock()

if !exists {
l.lock.Lock()

limiter = rate.NewLimiter(l.rate, l.burst)
l.limiters[item] = limiter

l.lock.Unlock()
}

return limiter
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ require (
github.com/stretchr/testify v1.11.1
go.uber.org/zap v1.27.0
golang.org/x/mod v0.29.0
golang.org/x/time v0.14.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/api v0.33.5
k8s.io/apiextensions-apiserver v0.33.5
Expand Down Expand Up @@ -110,7 +111,6 @@ require (
golang.org/x/sys v0.37.0 // indirect
golang.org/x/term v0.35.0 // indirect
golang.org/x/text v0.29.0 // indirect
golang.org/x/time v0.14.0 // indirect
golang.org/x/tools v0.37.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
Expand Down
5 changes: 3 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ func main() {
EnableMetrics: opts.enableMetrics,
UninstallMode: beingUninstalled,
EvalBackoffSeconds: opts.evalBackoffSeconds,
ItemLimiters: controllers.NewPerItemRateLimiter[reconcile.Request](opts.evalBackoffSeconds, 1),
HubDynamicWatcher: configPolHubDynamicWatcher,
HubClient: hubClient,
ClusterName: opts.clusterName,
Expand Down Expand Up @@ -795,8 +796,8 @@ func parseOpts(flags *pflag.FlagSet, args []string) *ctrlOpts {
flags.Uint32Var(
&opts.evalBackoffSeconds,
"evaluation-backoff",
10,
"The number of seconds before a policy is eligible for reevaluation in watch mode (throttles frequently "+
5,
"The number of seconds before a policy is eligible for reevaluation (throttles frequently "+
"evaluated policies)",
)

Expand Down
4 changes: 2 additions & 2 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ func TestRunMain(t *testing.T) {
"--leader-elect=false",
fmt.Sprintf("--target-kubeconfig-path=%s", os.Getenv("TARGET_KUBECONFIG_PATH")),
"--log-level=1",
// Speed up the tests by not throttling the policy evaluations
"--evaluation-backoff=1",
// Speed up the tests by not throttling the policy evaluations very much
"--evaluation-backoff=2",
"--enable-operator-policy=true",
)

Expand Down
95 changes: 95 additions & 0 deletions test/e2e/case47_ratelimit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright (c) 2025 Red Hat, Inc.
// Copyright Contributors to the Open Cluster Management project

package e2e

import (
"fmt"
"strconv"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"

"open-cluster-management.io/config-policy-controller/test/utils"
)

var _ = Describe("Test config policy ratelimiting", Ordered, func() {
const (
policyName = "case47-cfgpolicy"
policyYaml = "../resources/case47_ratelimit/" + policyName + ".yaml"
configMapName = "case47-configmap-to-watch"
configMapYaml = "../resources/case47_ratelimit/" + configMapName + ".yaml"
managedCMName = "case47-configmap-from-policy"
)

metricCheck := func(metricName string, label string, value string) (float64, error) {
metric := utils.GetMetrics(
metricName, fmt.Sprintf(`%s=\"%s\"`, label, value))
if len(metric) == 0 {
return 0, fmt.Errorf("failed to retrieve any %s metric", metricName)
}
metricVal, err := strconv.ParseFloat(metric[0], 64)
if err != nil {
return 0, fmt.Errorf("error converting metric: %w", err)
}

return metricVal, nil
}

BeforeAll(func() {
By("Creating " + policyYaml)
utils.Kubectl("apply", "-f", policyYaml, "-n", testNamespace)
By("Creating " + configMapYaml)
utils.Kubectl("apply", "-f", configMapYaml) // The YAML specifies namespace "default"
})

It("should initially have a small number of evaluations", func() {
Eventually(
metricCheck, 10, 2,
).WithArguments("config_policy_evaluation_total", "name", policyName).Should(BeNumerically("<", 4))

Consistently(
metricCheck, 10, 2,
).WithArguments("config_policy_evaluation_total", "name", policyName).Should(BeNumerically("<", 4))
})

value := 0

It("should limit the number of evaluations when a watched object changes frequently", func() {
start := time.Now()

By("Updating the watched configmap frequently for 10 seconds")
for start.Add(10 * time.Second).After(time.Now()) {
value++
utils.Kubectl("patch", "configmap", configMapName, "--type=json", "-p",
`[{"op": "replace", "path": "/data/foo", "value": "`+strconv.Itoa(value)+`"}]`)
time.Sleep(150 * time.Millisecond)
}

Consistently(
metricCheck, 10, 2,
).WithArguments("config_policy_evaluation_total", "name", policyName).Should(BeNumerically("<", 12))
})

It("should have updated the object to the final value", func() {
By("Verifying the configmap has bar=" + strconv.Itoa(value))
Eventually(func(g Gomega) {
cm := utils.GetWithTimeout(clientManagedDynamic, gvrConfigMap,
managedCMName, "default", true, defaultTimeoutSeconds)
g.Expect(cm).NotTo(BeNil())

val, found, err := unstructured.NestedString(cm.Object, "data", "bar")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(found).To(BeTrue())
g.Expect(val).Should(Equal(strconv.Itoa(value)))
}, defaultTimeoutSeconds, 1).Should(Succeed())
})

AfterAll(func() {
utils.KubectlDelete("-n", testNamespace, "-f", policyYaml)
utils.KubectlDelete("-f", configMapYaml)
utils.KubectlDelete("configmap", "-n", "default", "case47-configmap-from-policy")
})
})
16 changes: 16 additions & 0 deletions test/resources/case47_ratelimit/case47-cfgpolicy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: policy.open-cluster-management.io/v1
kind: ConfigurationPolicy
metadata:
name: case47-cfgpolicy
spec:
remediationAction: enforce
object-templates:
- complianceType: musthave
objectDefinition:
apiVersion: v1
kind: ConfigMap
metadata:
name: case47-configmap-from-policy
namespace: default
data:
bar: '{{ fromConfigMap "default" "case47-configmap-to-watch" "foo" }}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: case47-configmap-to-watch
namespace: default
data:
foo: "0"