diff --git a/Makefile b/Makefile index 314f6b1a..a448ea31 100644 --- a/Makefile +++ b/Makefile @@ -82,10 +82,6 @@ test: ## Run unit tests. test-integration: $(SETUP_ENVTEST) ## Run integration tests. ./hack/test-integration.sh ./test/integration/... -.PHONY: test-kyverno -test-kyverno: $(KYVERNO) ## Run kyverno policy tests. - $(KYVERNO) test --remove-color -v 4 . - .PHONY: test-e2e test-e2e: $(GINKGO) ## Run e2e tests. ./hack/test-e2e.sh $(GINKGO_FLAGS) ./test/e2e/... ./webhosting-operator/test/e2e/... @@ -102,7 +98,7 @@ lint: $(GOLANGCI_LINT) ## Run golangci-lint against code. $(GOLANGCI_LINT) run ./... ./webhosting-operator/... .PHONY: check -check: lint test test-integration test-kyverno ## Check everything (lint + test + test-integration + test-kyverno). +check: lint test test-integration ## Check everything (lint + test + test-integration). .PHONY: verify-fmt verify-fmt: fmt ## Verify go code is formatted. diff --git a/docs/evaluation.md b/docs/evaluation.md index 4be3042c..5d5a0738 100644 --- a/docs/evaluation.md +++ b/docs/evaluation.md @@ -124,7 +124,8 @@ In addition to the described components, [kyverno](https://github.com/kyverno/ky In the cluster itself, kyverno policies are used for scheduling the sharder and webhosting-operator to the dedicated `sharding` worker pool and experiment to the dedicated `experiment` worker pool. This makes sure that these components run on machines isolated from other system components and don't content for compute resources during load tests. -Furthermore, kyverno policies are added to the control plane to ensure a static size of etcd, kube-apiserver, and kube-controller-manager (requests=limits for guaranteed resources, disable vertical autoscaling, 4 replicas of kube-apiserver to disable horizontal autoscaling) and schedule them to a dedicated worker pool using a non-overcommit flavor with more CPU cores per machine. +Furthermore, kyverno policies are added to the control plane to ensure a static size of etcd, kube-apiserver, and kube-controller-manager (requests=limits for guaranteed resources, disable vertical autoscaling, 4 replicas of kube-apiserver and disable horizontal autoscaling). +Also, kube-controller-manager's client-side rate limiting is disabled (ref https://github.com/timebertt/kubernetes-controller-sharding/pull/610, [SIG api-machinery recommendation](https://kubernetes.slack.com/archives/C0EG7JC6T/p1680889646346859?thread_ts=1680791299.631439&cid=C0EG7JC6T)) and HTTP/2 is disabled so that API requests are distributed across API server instances (ref https://github.com/gardener/gardener/issues/8810). This is done to make load test experiments more stable and their results more reproducible. ## Measurements diff --git a/hack/config/policy/controlplane/etcd-main.yaml b/hack/config/policy/controlplane/etcd-main.yaml index 70a9e5fc..98915562 100644 --- a/hack/config/policy/controlplane/etcd-main.yaml +++ b/hack/config/policy/controlplane/etcd-main.yaml @@ -2,7 +2,7 @@ apiVersion: kyverno.io/v1 kind: Policy metadata: name: etcd-main - namespace: shoot--timebertt--sharding + namespace: shoot--ixywdlfvei--sharding spec: failurePolicy: Fail rules: @@ -15,8 +15,7 @@ spec: - Pod selector: matchLabels: - instance: etcd-main - name: etcd + app.kubernetes.io/name: etcd-main mutate: patchStrategicMerge: spec: @@ -33,22 +32,16 @@ spec: env: - name: GOMAXPROCS value: "12" - # schedule etcd-main on high-cpu worker pool for stable performance - - name: add-scheduling-constraints + - name: disable-vpa match: any: - resources: kinds: - - Pod - selector: - matchLabels: - instance: etcd-main - name: etcd + - VerticalPodAutoscaler + names: + - etcd-main mutate: - patchesJson6902: |- - - op: add - path: "/spec/tolerations/-" - value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"} - - op: replace - path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms" - value: [{"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}] + patchStrategicMerge: + spec: + updatePolicy: + updateMode: Off diff --git a/hack/config/policy/controlplane/kube-apiserver-scale.yaml b/hack/config/policy/controlplane/kube-apiserver-scale.yaml deleted file mode 100644 index 7468ff71..00000000 --- a/hack/config/policy/controlplane/kube-apiserver-scale.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: kyverno.io/v1 -kind: Policy -metadata: - name: kube-apiserver-scale - namespace: shoot--timebertt--sharding -spec: - failurePolicy: Ignore - # schema validation doesn't seem to work in combination with the /scale subresource, disable it for now - schemaValidation: false - rules: - # set static replicas on kube-apiserver to ensure similar evaluation environment between load test runs - - name: replicas-scale - match: - any: - - resources: - # mutate scale requests by HPA - kinds: - - Deployment/scale - # the Scale subresource doesn't have the original resource's labels -> we have to match by name - names: - - kube-apiserver - preconditions: - all: - # Only patch spec.replicas if the control plane is not hibernated, i.e., if spec.replicas>=1. - - key: "{{ request.object.spec.replicas || `1` }}" - operator: GreaterThan - value: 0 - mutate: - patchStrategicMerge: - spec: - replicas: 4 diff --git a/hack/config/policy/controlplane/kube-apiserver.yaml b/hack/config/policy/controlplane/kube-apiserver.yaml index b450dfdd..b9c193fa 100644 --- a/hack/config/policy/controlplane/kube-apiserver.yaml +++ b/hack/config/policy/controlplane/kube-apiserver.yaml @@ -2,34 +2,24 @@ apiVersion: kyverno.io/v1 kind: Policy metadata: name: kube-apiserver - namespace: shoot--timebertt--sharding + namespace: shoot--ixywdlfvei--sharding spec: failurePolicy: Fail rules: # set static replicas on kube-apiserver to ensure similar evaluation environment between load test runs - # if the cluster is hibernated (spec.replicas=0), this rule is skipped - - name: replicas + - name: disable-hpa match: any: - resources: kinds: - - Deployment - selector: - matchLabels: - app: kubernetes - role: apiserver - preconditions: - all: - # Only patch spec.replicas if the control plane is not hibernated, i.e., if spec.replicas>=1. - # NB: gardenlet deploys kube-apiserver with spec.replicas=null which is defaulted after the policy webhook call - # to spec.replicas=1. Hence, treat spec.replicas=null the same way as spec.replicas=1. - - key: "{{ request.object.spec.replicas || `1` }}" - operator: GreaterThan - value: 0 + - HorizontalPodAutoscaler + names: + - kube-apiserver mutate: patchStrategicMerge: spec: - replicas: 4 + minReplicas: 4 + maxReplicas: 4 # set static requests/limits on kube-apiserver to ensure similar evaluation environment between load test runs - name: resources match: @@ -57,22 +47,16 @@ spec: env: - name: GOMAXPROCS value: "12" - # schedule kube-apiserver on high-cpu worker pool for stable performance - - name: add-scheduling-constraints + - name: disable-vpa match: any: - resources: kinds: - - Pod - selector: - matchLabels: - app: kubernetes - role: apiserver + - VerticalPodAutoscaler + names: + - kube-apiserver-vpa mutate: - patchesJson6902: |- - - op: add - path: "/spec/tolerations/-" - value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"} - - op: add - path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms" - value: [{"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}] + patchStrategicMerge: + spec: + updatePolicy: + updateMode: Off diff --git a/hack/config/policy/controlplane/kube-controller-manager.yaml b/hack/config/policy/controlplane/kube-controller-manager.yaml index 7242b01c..b2b529e1 100644 --- a/hack/config/policy/controlplane/kube-controller-manager.yaml +++ b/hack/config/policy/controlplane/kube-controller-manager.yaml @@ -2,7 +2,7 @@ apiVersion: kyverno.io/v1 kind: Policy metadata: name: kube-controller-manager - namespace: shoot--timebertt--sharding + namespace: shoot--ixywdlfvei--sharding spec: failurePolicy: Ignore rules: @@ -46,27 +46,8 @@ spec: spec: updatePolicy: updateMode: Off - # schedule kube-controller-manager on high-cpu worker pool for stable performance - - name: add-scheduling-constraints - match: - any: - - resources: - kinds: - - Pod - selector: - matchLabels: - app: kubernetes - role: controller-manager - mutate: - patchesJson6902: |- - - op: add - path: "/spec/tolerations/-" - value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"} - - op: add - path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms" - value: [{"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}] - # increases kube-controller-manager's client-side rate limits to speed up garbage collection after executing load tests - - name: increase-rate-limits + # disable kube-controller-manager's client-side rate limits similar to webhosting-operator + - name: disable-rate-limits match: any: - resources: @@ -78,10 +59,7 @@ spec: patchesJson6902: |- - op: add path: /spec/template/spec/containers/0/command/- - value: "--kube-api-qps=2000" - - op: add - path: /spec/template/spec/containers/0/command/- - value: "--kube-api-burst=2200" + value: "--kube-api-qps=-1" # disable HTTP2 in kube-controller-manager's so that API requests are distributed across API server instances - name: disable-http2 match: diff --git a/hack/config/policy/controlplane/kustomization.yaml b/hack/config/policy/controlplane/kustomization.yaml index f854e40e..21c371e3 100644 --- a/hack/config/policy/controlplane/kustomization.yaml +++ b/hack/config/policy/controlplane/kustomization.yaml @@ -7,5 +7,4 @@ kind: Kustomization resources: - etcd-main.yaml - kube-apiserver.yaml -- kube-apiserver-scale.yaml - kube-controller-manager.yaml diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/kyverno-test.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/kyverno-test.yaml deleted file mode 100644 index f89169d3..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/kyverno-test.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: cli.kyverno.io/v1alpha1 -kind: Test -metadata: - name: kube-apiserver-scale-awake -policies: -- ../../kube-apiserver-scale.yaml -resources: -# spec.replicas=2 -> expect spec.replicas=4 -- scale.yaml -variables: variables.yaml -results: -- policy: shoot--timebertt--sharding/kube-apiserver-scale - rule: replicas-scale - resources: - - shoot--timebertt--sharding/kube-apiserver - kind: Scale - result: pass - patchedResources: scale_expected.yaml diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale.yaml deleted file mode 100644 index 69ae402c..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Scale -apiVersion: autoscaling/v1 -metadata: - name: kube-apiserver - namespace: shoot--timebertt--sharding -spec: - replicas: 2 diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale_expected.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale_expected.yaml deleted file mode 100644 index 817f8bb9..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale_expected.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Scale -apiVersion: autoscaling/v1 -metadata: - name: kube-apiserver - namespace: shoot--timebertt--sharding -spec: - replicas: 4 diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/variables.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/variables.yaml deleted file mode 100644 index f16b8b5a..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-awake/variables.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: cli.kyverno.io/v1alpha1 -kind: Values -metadata: - name: values -subresources: -- subresource: - name: "deployments/scale" - kind: "Scale" - group: "autoscaling" - version: "v1" - parentResource: - name: "deployments" - kind: "Deployment" - group: "apps" - version: "v1" diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/kyverno-test.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/kyverno-test.yaml deleted file mode 100644 index bb352093..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/kyverno-test.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: cli.kyverno.io/v1alpha1 -kind: Test -metadata: - name: kube-apiserver-scale-hibernated -policies: -- ../../kube-apiserver-scale.yaml -resources: -# spec.replicas=0 -> expect skip -- scale.yaml -variables: variables.yaml -results: -- policy: shoot--timebertt--sharding/kube-apiserver-scale - rule: replicas-scale - resources: - - shoot--timebertt--sharding/kube-apiserver - kind: Scale - result: skip diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/scale.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/scale.yaml deleted file mode 100644 index 3e7b0d7d..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/scale.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Scale -apiVersion: autoscaling/v1 -metadata: - name: kube-apiserver - namespace: shoot--timebertt--sharding -spec: - replicas: 0 diff --git a/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/variables.yaml b/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/variables.yaml deleted file mode 100644 index f16b8b5a..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/variables.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: cli.kyverno.io/v1alpha1 -kind: Values -metadata: - name: values -subresources: -- subresource: - name: "deployments/scale" - kind: "Scale" - group: "autoscaling" - version: "v1" - parentResource: - name: "deployments" - kind: "Deployment" - group: "apps" - version: "v1" diff --git a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake.yaml b/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake.yaml deleted file mode 100644 index 4eda5fa0..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: kubernetes - role: apiserver - name: kube-apiserver-awake - namespace: shoot--timebertt--sharding -spec: - replicas: 2 - template: - spec: - containers: - - name: kube-apiserver - resources: - requests: - cpu: 800m - memory: 800Mi diff --git a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake_expected.yaml b/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake_expected.yaml deleted file mode 100644 index 86097fee..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake_expected.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: kubernetes - role: apiserver - name: kube-apiserver-awake - namespace: shoot--timebertt--sharding -spec: - replicas: 4 - template: - spec: - containers: - - name: kube-apiserver - resources: - requests: - cpu: 800m - memory: 800Mi diff --git a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-hibernated.yaml b/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-hibernated.yaml deleted file mode 100644 index 38727f81..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-hibernated.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: kubernetes - role: apiserver - name: kube-apiserver-hibernated - namespace: shoot--timebertt--sharding -spec: - replicas: 0 - template: - spec: - containers: - - name: kube-apiserver - resources: - requests: - cpu: 800m - memory: 800Mi diff --git a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null.yaml b/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null.yaml deleted file mode 100644 index eaa26cb2..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: kubernetes - role: apiserver - name: kube-apiserver-null - namespace: shoot--timebertt--sharding -spec: - template: - spec: - containers: - - name: kube-apiserver - resources: - requests: - cpu: 800m - memory: 800Mi diff --git a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null_expected.yaml b/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null_expected.yaml deleted file mode 100644 index b61d5fa1..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null_expected.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: kubernetes - role: apiserver - name: kube-apiserver-null - namespace: shoot--timebertt--sharding -spec: - replicas: 4 - template: - spec: - containers: - - name: kube-apiserver - resources: - requests: - cpu: 800m - memory: 800Mi diff --git a/hack/config/policy/controlplane/tests/kube-apiserver/kyverno-test.yaml b/hack/config/policy/controlplane/tests/kube-apiserver/kyverno-test.yaml deleted file mode 100644 index 148c2606..00000000 --- a/hack/config/policy/controlplane/tests/kube-apiserver/kyverno-test.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: cli.kyverno.io/v1alpha1 -kind: Test -metadata: - name: kube-apiserver -policies: -- ../../kube-apiserver.yaml -resources: -# spec.replicas=2 -> expect spec.replicas=4 -- kube-apiserver-awake.yaml -# spec.replicas=null -> expect spec.replicas=4 -- kube-apiserver-null.yaml -# spec.replicas=0 -> expect skip -- kube-apiserver-hibernated.yaml -results: -- policy: shoot--timebertt--sharding/kube-apiserver - rule: replicas - resources: - - shoot--timebertt--sharding/kube-apiserver-awake - kind: Deployment - result: pass - patchedResources: kube-apiserver-awake_expected.yaml -- policy: shoot--timebertt--sharding/kube-apiserver - rule: replicas - resources: - - shoot--timebertt--sharding/kube-apiserver-null - kind: Deployment - result: pass - patchedResources: kube-apiserver-null_expected.yaml -- policy: shoot--timebertt--sharding/kube-apiserver - rule: replicas - resources: - - shoot--timebertt--sharding/kube-apiserver-hibernated - kind: Deployment - result: skip diff --git a/hack/tools.mk b/hack/tools.mk index 869992bb..d1320de9 100644 --- a/hack/tools.mk +++ b/hack/tools.mk @@ -55,13 +55,6 @@ $(KUBECTL): $(call tool_version_file,$(KUBECTL),$(KUBECTL_VERSION)) curl -Lo $(KUBECTL) https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/$(shell uname -s | tr '[:upper:]' '[:lower:]')/$(shell uname -m | sed 's/x86_64/amd64/')/kubectl chmod +x $(KUBECTL) -KYVERNO := $(TOOLS_BIN_DIR)/kyverno -# renovate: datasource=github-releases depName=kyverno/kyverno -KYVERNO_VERSION ?= v1.15.1 -$(KYVERNO): $(call tool_version_file,$(KYVERNO),$(KYVERNO_VERSION)) - curl -Lo - https://github.com/kyverno/kyverno/releases/download/$(KYVERNO_VERSION)/kyverno-cli_$(KYVERNO_VERSION)_$(shell uname -s | tr '[:upper:]' '[:lower:]')_$(shell uname -m | sed 's/aarch64/arm64/').tar.gz | tar -xzmf - -C $(TOOLS_BIN_DIR) kyverno - chmod +x $(KYVERNO) - SETUP_ENVTEST := $(TOOLS_BIN_DIR)/setup-envtest CONTROLLER_RUNTIME_VERSION ?= $(call version_gomod,sigs.k8s.io/controller-runtime) $(SETUP_ENVTEST): $(call tool_version_file,$(SETUP_ENVTEST),$(CONTROLLER_RUNTIME_VERSION))