promtorture: deploy with kapp

ringerc · ringerc · commit 864ee0555a77 · 2024-08-12T12:14:28.000+12:00
diff --git a/testcases/promtorture/.dockerignore b/testcases/promtorture/.dockerignore
@@ -1,6 +1,13 @@
 scripts
+resources
 promtorture
 LICENSE
+README.md
 Dockerfile
 .cache
 promtool
+kubernetes/
+notes.md
+promtorture-tmp-*
+promtorture-metrics-*
+.gitignore
diff --git a/testcases/promtorture/.gitignore b/testcases/promtorture/.gitignore
@@ -3,3 +3,4 @@ promtool
 .cache
 notes.md
 promtorture-metrics-*
+promtorture-tmp-*
diff --git a/testcases/promtorture/README.md b/testcases/promtorture/README.md
@@ -2,6 +2,16 @@
 
 Make prometheus feel really bad. For science.
 
+## prerequisites
+
+* [`go`](https://golang.org/)
+* [`kind`](https://kind.sigs.k8s.io/)
+* [`kapp`](https://get-kapp.io/)
+* [`kustomize`](https://kustomize.io/)
+* [`kubectl`](https://kubernetes.io/docs/tasks/tools/)
+* [`jq`](https://stedolan.github.io/jq/)
+* [`yq`](https://mikefarah.gitbook.io/yq/)
+
 ## quickstart
 
 ```
diff --git a/testcases/promtorture/kubernetes/kube-prometheus/kustomization.yaml b/testcases/promtorture/kubernetes/kube-prometheus/kustomization.yaml
@@ -0,0 +1,86 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+metadata:
+  annotations:
+    config.kubernetes.io/local-config: "true"
+resources:
+  # populated by scripts/kind-create.sh
+  - ../../.cache/kube-prometheus
+patches:
+  - target:
+      kind: Prometheus
+      name: k8s
+      namespace: monitoring
+    patch: |-
+      apiVersion: monitoring.coreos.com/v1
+      kind: Prometheus
+      metadata:
+        name: k8s
+        namespace: monitoring
+      spec:
+        alerting:
+          alertmanagers: []
+        replicas: 1
+        # args to pass to the prometheus container, see kubectl explain prometheus.spec.additionalArgs
+        additionalArgs:
+        - name: web.enable-admin-api
+        - name: web.enable-remote-write-receiver
+        # https://prometheus.io/docs/prometheus/latest/feature_flags/#memory-snapshot-on-shutdown
+        # pointless when we aren't using a PV, but should move to it
+        # for BA anyway
+        - name: enable-feature
+          value: memory-snapshot-on-shutdown
+        # https://prometheus.io/docs/prometheus/latest/feature_flags/#extra-scrape-metrics
+        - name: enable-feature
+          value: extra-scrape-metrics
+        # https://prometheus.io/docs/prometheus/latest/feature_flags/#per-step-stats
+        - name: enable-feature
+          value: promql-per-step-stats
+        # https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomemlimit
+        - name: enable-feature
+          value: auto-gomemlimit
+        - name: auto-gomemlimit.ratio
+          # an unusually low GOMEMLIMIT is set here deliberately. We're using
+          # Prometheus to monitor Prometheus and don't really want to have to have 2
+          # separate instances, so one can be allowed to OOM. Plus we get better data
+          # when we can still scrape it even if it's over-limits. So we're going to
+          # set a really aggressive GOMEMLIMIT that makes it GC hard when it exceeds
+          # 25% of its container limit, giving it tons of headroom to balloon into
+          # without OOMing.
+          value: "0.25"
+        # https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomaxprocs
+        - name: enable-feature
+          value: auto-gomaxprocs
+        # https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection
+        - name: enable-feature
+          value: created-timestamp-zero-ingestion
+        # this is strategically merged by the operator with the default spec, see kubectl explain prometheus.spec.containers
+        containers:
+          - name: config-reloader
+            securityContext:
+              runAsNonRoot: true
+              runAsUser: 1000
+              allowPrivilegeEscalation: false
+              privileged: false
+              readOnlyRootFilesystem: true
+              capabilities:
+                drop:
+                  - ALL
+          - name: prometheus
+            securityContext:
+              runAsNonRoot: true
+              runAsUser: 1000
+              allowPrivilegeEscalation: false
+              privileged: false
+              readOnlyRootFilesystem: true
+              capabilities:
+                drop:
+                  - ALL
+        resources:
+          limits:
+            cpu: 1000m
+            # See the GOMEMLIMIT
+            memory: 2000Mi
+          requests:
+            cpu: 500m
+            memory: 2000Mi
diff --git a/testcases/promtorture/scripts/grab-metrics.sh b/testcases/promtorture/scripts/grab-metrics.sh
@@ -11,6 +11,7 @@ set -e -u -o pipefail -x
 source scripts/config
 
 tmpdir="promtorture-metrics-$(date -Isec)"
+mkdir "$tmpdir"
 
 echo 1>&2 "Dumping metrics to $tmpdir"
 
@@ -25,7 +26,9 @@ socks5_pid=$!
 # Becuse socks5-proxy is written in bash this kills the script but not the
 # underlying kubectl port-forward process. We should fix this in the
 # socks5-proxy script. For now we'll find the child proc and kill it.
-trap 'kill $(pgrep -P ${socks5_pid}); kill ${socks5_pid}' EXIT
+# It should also wait with timeout for the wrapper to exit, but bash's wait
+# lacks a timeout option...
+trap 'kill $(pgrep -P ${socks5_pid}); sleep 10; kill ${socks5_pid}' EXIT
 export http_proxy="socks5://${socks5_host}:${socks5_port}"
 while : ; do
   # wait for proxy to be ready by checking prometheus is reachable
diff --git a/testcases/promtorture/scripts/kind-create.sh b/testcases/promtorture/scripts/kind-create.sh
@@ -1,110 +1,24 @@
 #!/bin/bash
 
-set -e -u -o pipefail -x
+set -e -u -o pipefail -vx
 
 source scripts/config
 
 if ! grep -q "${kind_cluster_name}" <<< $(kind get clusters); then
   kind create cluster --name "${kind_cluster_name}"
 fi
 
-mkdir -p .cache
-if [ ! -f .cache/kube-prometheus.yaml ]; then
-  kustomize build https://github.com/prometheus-operator/kube-prometheus -o .cache/kube-prometheus.yaml
+# Kustomize lacks support for caching remote bases
+# per https://github.com/kubernetes-sigs/kustomize/issues/1431
+mkdir -p .cache/kube-prometheus
+if [ ! -f .cache/kube-prometheus/kube-prometheus.yaml ]; then
+  kustomize build https://github.com/prometheus-operator/kube-prometheus -o .cache/kube-prometheus/kube-prometheus.yaml
 fi
+if [ -f .cache/kube-prometheus/kustomization.yaml ]; then
+  rm .cache/kube-prometheus/kustomization.yaml
+fi
+(cd .cache/kube-prometheus && kustomize create && kustomize edit add resource kube-prometheus.yaml)
 
-# because kubectl apply still doesn't know how to wait for CRDs
-# before applying the rest...
-yq '.|select(.kind == "CustomResourceDefinition")' .cache/kube-prometheus.yaml | "${kubectl[@]}" apply --server-side -f -
-
-"${kubectl[@]}" wait --for=condition=established --timeout=60s crd --all
-
-yq '
-  .
-  |select(
-    (.kind != "CustomResourceDefinition")
-    and (.kind != "AlertManager")
-   )' .cache/kube-prometheus.yaml |\
-"${kubectl[@]}" apply --server-side --force-conflicts -f -
-
-# Scale down to 1 replica and don't deploy alertmanager, add resources,
-# enable some feature flags, and set a low GOMEMLIMIT
-"${kubectl[@]}" patch -n monitoring prometheus/k8s \
-    --type merge \
-    --patch-file /dev/stdin <<'__END__'
-apiVersion: monitoring.coreos.com/v1
-kind: Prometheus
-spec:
-  alerting:
-    alertmanagers: []
-  replicas: 1
-  # args to pass to the prometheus container, see kubectl explain prometheus.spec.additionalArgs
-  additionalArgs:
-  - name: web.enable-admin-api
-  - name: web.enable-remote-write-receiver
-  # https://prometheus.io/docs/prometheus/latest/feature_flags/#memory-snapshot-on-shutdown
-  # pointless when we aren't using a PV, but should move to it
-  # for BA anyway
-  - name: enable-feature
-    value: memory-snapshot-on-shutdown
-  # https://prometheus.io/docs/prometheus/latest/feature_flags/#extra-scrape-metrics
-  - name: enable-feature
-    value: extra-scrape-metrics
-  # https://prometheus.io/docs/prometheus/latest/feature_flags/#per-step-stats
-  - name: enable-feature
-    value: promql-per-step-stats
-  # https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomemlimit
-  - name: enable-feature
-    value: auto-gomemlimit
-  - name: auto-gomemlimit.ratio
-    # an unusually low GOMEMLIMIT is set here deliberately. We're using
-    # Prometheus to monitor Prometheus and don't really want to have to have 2
-    # separate instances, so one can be allowed to OOM. Plus we get better data
-    # when we can still scrape it even if it's over-limits. So we're going to
-    # set a really aggressive GOMEMLIMIT that makes it GC hard when it exceeds
-    # 25% of its container limit, giving it tons of headroom to balloon into
-    # without OOMing.
-    value: "0.25"
-  # https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomaxprocs
-  - name: enable-feature
-    value: auto-gomaxprocs
-  # https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection
-  - name: enable-feature
-    value: created-timestamp-zero-ingestion
-  # this is strategically merged by the operator with the default spec, see kubectl explain prometheus.spec.containers
-  containers:
-    - name: config-reloader
-      securityContext:
-        runAsNonRoot: true
-        runAsUser: 1000
-        allowPrivilegeEscalation: false
-        privileged: false
-        readOnlyRootFilesystem: true
-        capabilities:
-          drop:
-            - ALL
-    - name: prometheus
-      securityContext:
-        runAsNonRoot: true
-        runAsUser: 1000
-        allowPrivilegeEscalation: false
-        privileged: false
-        readOnlyRootFilesystem: true
-        capabilities:
-          drop:
-            - ALL
-  resources:
-    limits:
-      cpu: 1000m
-      # See the GOMEMLIMIT
-      memory: 2000Mi
-    requests:
-      cpu: 500m
-      memory: 2000Mi
-__END__
-
-
-"${kubectl[@]}" wait --for=condition=available --timeout=300s deployment --all -n monitoring
-"${kubectl[@]}" wait --for=condition=ready --timeout=300s pod --all -n monitoring
+kustomize build kubernetes/kube-prometheus | kapp deploy -a kube-prometheus -f - -y
 
 # vim: et ts=2 sw=2 sts=2 ft=bash ai
diff --git a/testcases/promtorture/scripts/kind-deploy.sh b/testcases/promtorture/scripts/kind-deploy.sh