Skip to content

Commit 864ee05

Browse files
committed
promtorture: deploy with kapp
1 parent 3a0ac77 commit 864ee05

File tree

7 files changed

+153
-179
lines changed

7 files changed

+153
-179
lines changed

testcases/promtorture/.dockerignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
scripts
2+
resources
23
promtorture
34
LICENSE
5+
README.md
46
Dockerfile
57
.cache
68
promtool
9+
kubernetes/
10+
notes.md
11+
promtorture-tmp-*
12+
promtorture-metrics-*
13+
.gitignore

testcases/promtorture/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ promtool
33
.cache
44
notes.md
55
promtorture-metrics-*
6+
promtorture-tmp-*

testcases/promtorture/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,16 @@
22

33
Make prometheus feel really bad. For science.
44

5+
## prerequisites
6+
7+
* [`go`](https://golang.org/)
8+
* [`kind`](https://kind.sigs.k8s.io/)
9+
* [`kapp`](https://get-kapp.io/)
10+
* [`kustomize`](https://kustomize.io/)
11+
* [`kubectl`](https://kubernetes.io/docs/tasks/tools/)
12+
* [`jq`](https://stedolan.github.io/jq/)
13+
* [`yq`](https://mikefarah.gitbook.io/yq/)
14+
515
## quickstart
616

717
```
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
metadata:
4+
annotations:
5+
config.kubernetes.io/local-config: "true"
6+
resources:
7+
# populated by scripts/kind-create.sh
8+
- ../../.cache/kube-prometheus
9+
patches:
10+
- target:
11+
kind: Prometheus
12+
name: k8s
13+
namespace: monitoring
14+
patch: |-
15+
apiVersion: monitoring.coreos.com/v1
16+
kind: Prometheus
17+
metadata:
18+
name: k8s
19+
namespace: monitoring
20+
spec:
21+
alerting:
22+
alertmanagers: []
23+
replicas: 1
24+
# args to pass to the prometheus container, see kubectl explain prometheus.spec.additionalArgs
25+
additionalArgs:
26+
- name: web.enable-admin-api
27+
- name: web.enable-remote-write-receiver
28+
# https://prometheus.io/docs/prometheus/latest/feature_flags/#memory-snapshot-on-shutdown
29+
# pointless when we aren't using a PV, but should move to it
30+
# for BA anyway
31+
- name: enable-feature
32+
value: memory-snapshot-on-shutdown
33+
# https://prometheus.io/docs/prometheus/latest/feature_flags/#extra-scrape-metrics
34+
- name: enable-feature
35+
value: extra-scrape-metrics
36+
# https://prometheus.io/docs/prometheus/latest/feature_flags/#per-step-stats
37+
- name: enable-feature
38+
value: promql-per-step-stats
39+
# https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomemlimit
40+
- name: enable-feature
41+
value: auto-gomemlimit
42+
- name: auto-gomemlimit.ratio
43+
# an unusually low GOMEMLIMIT is set here deliberately. We're using
44+
# Prometheus to monitor Prometheus and don't really want to have to have 2
45+
# separate instances, so one can be allowed to OOM. Plus we get better data
46+
# when we can still scrape it even if it's over-limits. So we're going to
47+
# set a really aggressive GOMEMLIMIT that makes it GC hard when it exceeds
48+
# 25% of its container limit, giving it tons of headroom to balloon into
49+
# without OOMing.
50+
value: "0.25"
51+
# https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomaxprocs
52+
- name: enable-feature
53+
value: auto-gomaxprocs
54+
# https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection
55+
- name: enable-feature
56+
value: created-timestamp-zero-ingestion
57+
# this is strategically merged by the operator with the default spec, see kubectl explain prometheus.spec.containers
58+
containers:
59+
- name: config-reloader
60+
securityContext:
61+
runAsNonRoot: true
62+
runAsUser: 1000
63+
allowPrivilegeEscalation: false
64+
privileged: false
65+
readOnlyRootFilesystem: true
66+
capabilities:
67+
drop:
68+
- ALL
69+
- name: prometheus
70+
securityContext:
71+
runAsNonRoot: true
72+
runAsUser: 1000
73+
allowPrivilegeEscalation: false
74+
privileged: false
75+
readOnlyRootFilesystem: true
76+
capabilities:
77+
drop:
78+
- ALL
79+
resources:
80+
limits:
81+
cpu: 1000m
82+
# See the GOMEMLIMIT
83+
memory: 2000Mi
84+
requests:
85+
cpu: 500m
86+
memory: 2000Mi

testcases/promtorture/scripts/grab-metrics.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set -e -u -o pipefail -x
1111
source scripts/config
1212

1313
tmpdir="promtorture-metrics-$(date -Isec)"
14+
mkdir "$tmpdir"
1415

1516
echo 1>&2 "Dumping metrics to $tmpdir"
1617

@@ -25,7 +26,9 @@ socks5_pid=$!
2526
# Becuse socks5-proxy is written in bash this kills the script but not the
2627
# underlying kubectl port-forward process. We should fix this in the
2728
# socks5-proxy script. For now we'll find the child proc and kill it.
28-
trap 'kill $(pgrep -P ${socks5_pid}); kill ${socks5_pid}' EXIT
29+
# It should also wait with timeout for the wrapper to exit, but bash's wait
30+
# lacks a timeout option...
31+
trap 'kill $(pgrep -P ${socks5_pid}); sleep 10; kill ${socks5_pid}' EXIT
2932
export http_proxy="socks5://${socks5_host}:${socks5_port}"
3033
while : ; do
3134
# wait for proxy to be ready by checking prometheus is reachable
Lines changed: 11 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,24 @@
11
#!/bin/bash
22

3-
set -e -u -o pipefail -x
3+
set -e -u -o pipefail -vx
44

55
source scripts/config
66

77
if ! grep -q "${kind_cluster_name}" <<< $(kind get clusters); then
88
kind create cluster --name "${kind_cluster_name}"
99
fi
1010

11-
mkdir -p .cache
12-
if [ ! -f .cache/kube-prometheus.yaml ]; then
13-
kustomize build https://github.com/prometheus-operator/kube-prometheus -o .cache/kube-prometheus.yaml
11+
# Kustomize lacks support for caching remote bases
12+
# per https://github.com/kubernetes-sigs/kustomize/issues/1431
13+
mkdir -p .cache/kube-prometheus
14+
if [ ! -f .cache/kube-prometheus/kube-prometheus.yaml ]; then
15+
kustomize build https://github.com/prometheus-operator/kube-prometheus -o .cache/kube-prometheus/kube-prometheus.yaml
1416
fi
17+
if [ -f .cache/kube-prometheus/kustomization.yaml ]; then
18+
rm .cache/kube-prometheus/kustomization.yaml
19+
fi
20+
(cd .cache/kube-prometheus && kustomize create && kustomize edit add resource kube-prometheus.yaml)
1521

16-
# because kubectl apply still doesn't know how to wait for CRDs
17-
# before applying the rest...
18-
yq '.|select(.kind == "CustomResourceDefinition")' .cache/kube-prometheus.yaml | "${kubectl[@]}" apply --server-side -f -
19-
20-
"${kubectl[@]}" wait --for=condition=established --timeout=60s crd --all
21-
22-
yq '
23-
.
24-
|select(
25-
(.kind != "CustomResourceDefinition")
26-
and (.kind != "AlertManager")
27-
)' .cache/kube-prometheus.yaml |\
28-
"${kubectl[@]}" apply --server-side --force-conflicts -f -
29-
30-
# Scale down to 1 replica and don't deploy alertmanager, add resources,
31-
# enable some feature flags, and set a low GOMEMLIMIT
32-
"${kubectl[@]}" patch -n monitoring prometheus/k8s \
33-
--type merge \
34-
--patch-file /dev/stdin <<'__END__'
35-
apiVersion: monitoring.coreos.com/v1
36-
kind: Prometheus
37-
spec:
38-
alerting:
39-
alertmanagers: []
40-
replicas: 1
41-
# args to pass to the prometheus container, see kubectl explain prometheus.spec.additionalArgs
42-
additionalArgs:
43-
- name: web.enable-admin-api
44-
- name: web.enable-remote-write-receiver
45-
# https://prometheus.io/docs/prometheus/latest/feature_flags/#memory-snapshot-on-shutdown
46-
# pointless when we aren't using a PV, but should move to it
47-
# for BA anyway
48-
- name: enable-feature
49-
value: memory-snapshot-on-shutdown
50-
# https://prometheus.io/docs/prometheus/latest/feature_flags/#extra-scrape-metrics
51-
- name: enable-feature
52-
value: extra-scrape-metrics
53-
# https://prometheus.io/docs/prometheus/latest/feature_flags/#per-step-stats
54-
- name: enable-feature
55-
value: promql-per-step-stats
56-
# https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomemlimit
57-
- name: enable-feature
58-
value: auto-gomemlimit
59-
- name: auto-gomemlimit.ratio
60-
# an unusually low GOMEMLIMIT is set here deliberately. We're using
61-
# Prometheus to monitor Prometheus and don't really want to have to have 2
62-
# separate instances, so one can be allowed to OOM. Plus we get better data
63-
# when we can still scrape it even if it's over-limits. So we're going to
64-
# set a really aggressive GOMEMLIMIT that makes it GC hard when it exceeds
65-
# 25% of its container limit, giving it tons of headroom to balloon into
66-
# without OOMing.
67-
value: "0.25"
68-
# https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomaxprocs
69-
- name: enable-feature
70-
value: auto-gomaxprocs
71-
# https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection
72-
- name: enable-feature
73-
value: created-timestamp-zero-ingestion
74-
# this is strategically merged by the operator with the default spec, see kubectl explain prometheus.spec.containers
75-
containers:
76-
- name: config-reloader
77-
securityContext:
78-
runAsNonRoot: true
79-
runAsUser: 1000
80-
allowPrivilegeEscalation: false
81-
privileged: false
82-
readOnlyRootFilesystem: true
83-
capabilities:
84-
drop:
85-
- ALL
86-
- name: prometheus
87-
securityContext:
88-
runAsNonRoot: true
89-
runAsUser: 1000
90-
allowPrivilegeEscalation: false
91-
privileged: false
92-
readOnlyRootFilesystem: true
93-
capabilities:
94-
drop:
95-
- ALL
96-
resources:
97-
limits:
98-
cpu: 1000m
99-
# See the GOMEMLIMIT
100-
memory: 2000Mi
101-
requests:
102-
cpu: 500m
103-
memory: 2000Mi
104-
__END__
105-
106-
107-
"${kubectl[@]}" wait --for=condition=available --timeout=300s deployment --all -n monitoring
108-
"${kubectl[@]}" wait --for=condition=ready --timeout=300s pod --all -n monitoring
22+
kustomize build kubernetes/kube-prometheus | kapp deploy -a kube-prometheus -f - -y
10923

11024
# vim: et ts=2 sw=2 sts=2 ft=bash ai

0 commit comments

Comments
 (0)