Skip to content

Commit cba9722

Browse files
committed
promtorture: kind deployment and prometheus inspection
Some scripts to deploy kube-prometheus in kind, cut down to just scrape promtorture with a single replica. Adds resource limits, GOMEMLIMIT. Scripts to query the tsdb, run promtool, etc.
1 parent dc1d719 commit cba9722

File tree

13 files changed

+86485
-24
lines changed

13 files changed

+86485
-24
lines changed

testcases/promtorture/.cache/kube-prometheus.yaml

Lines changed: 86240 additions & 0 deletions
Large diffs are not rendered by default.

testcases/promtorture/.dockerignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
scripts
2+
promtorture
3+
LICENSE
4+
Dockerfile
5+
.cache
6+
promtool

testcases/promtorture/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
promtorture
2+
promtool
3+
.cache

testcases/promtorture/scripts/config

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
kind_cluster_name=promtorture
3+
kubectl_context=("--context" "kind-${kind_cluster_name}")
4+
kubectl_bin="kubectl"
5+
kubectl=("${kubectl_bin}" "${kubectl_context[@]}")
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
#
3+
# Grab promtool from the prometheus release bundle
4+
#
5+
# Can't be bothered getting latest with GH API
6+
7+
set -e -u -o pipefail
8+
9+
prom_version=2.53.1
10+
prom_target=linux-amd64
11+
12+
curl -Lf1 --output-dir .cache -O https://github.com/prometheus/prometheus/releases/download/v${prom_version}/prometheus-${prom_version}.${prom_target}.tar.gz
13+
tar xf ./.cache/prometheus-${prom_version}.${prom_target}.tar.gz --strip-components=1 --wildcards '*/promtool'

testcases/promtorture/scripts/kind-create.sh

100644100755
Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#!/bin/bash
22

3-
kind_cluster_name=promtorture
3+
set -e -u -o pipefail -x
44

5-
if ! grep -q "${kind_cluster_name} " <<< $(kind get clusters); then
5+
source scripts/config
6+
7+
if ! grep -q "${kind_cluster_name}" <<< $(kind get clusters); then
68
kind create cluster --name "${kind_cluster_name}"
79
fi
810

@@ -13,14 +15,67 @@ fi
1315

1416
# because kubectl apply still doesn't know how to wait for CRDs
1517
# before applying the rest...
16-
yq '[.items[] | select(.kind == "CustomResourceDefinition")]' .cache/kube-prometheus.yaml | kubectl apply -f -
18+
yq '.|select(.kind == "CustomResourceDefinition")' .cache/kube-prometheus.yaml | "${kubectl[@]}" apply --server-side -f -
1719

18-
# Sleeps suck, but we need to wait for the CRDs to be created and don't want to
19-
# overcomplicate this script by looping through kubectl api-resources and checking
20-
# for the CRDs we need.
21-
sleep 1
20+
"${kubectl[@]}" wait --for=condition=established --timeout=60s crd --all
2221

2322
# this'll re-apply the CRDs but that's harmless
24-
kubectl apply -f .cache/kube-prometheus.yaml
23+
#
24+
# ServiceMonitors are omitted deliberately because for this test we only want to see
25+
# metrics for explicitly named targets.
26+
#
27+
yq '.|select((.kind != "CustomResourceDefinition") and (.kind != "ServiceMonitor"))' | "${kubectl[@]}" apply --server-side -f -
28+
"${kubectl[@]}" apply --server-side -f .cache/kube-prometheus.yaml
29+
30+
# Scale down to 1 replica and don't deploy alertmanager
31+
"${kubectl[@]}" patch -n monitoring prometheus k8s --type merge --patch-file /dev/stdin <<'__END__'
32+
apiVersion: monitoring.coreos.com/v1
33+
kind: Prometheus
34+
spec:
35+
alerting:
36+
alertmanagers: []
37+
replicas: 1
38+
containers: # this is strategically merged by the operator with the default spec, see kubectl explain prometheus.spec.containers
39+
- name: config-reloader
40+
securityContext:
41+
runAsNonRoot: true
42+
runAsUser: 1000
43+
allowPrivilegeEscalation: false
44+
privileged: false
45+
readOnlyRootFilesystem: true
46+
capabilities:
47+
drop:
48+
- ALL
49+
- name: prometheus
50+
securityContext:
51+
runAsNonRoot: true
52+
runAsUser: 1000
53+
allowPrivilegeEscalation: false
54+
privileged: false
55+
readOnlyRootFilesystem: true
56+
capabilities:
57+
drop:
58+
- ALL
59+
env:
60+
# Set the GOMEMLIMIT to a high proportion of the container's memory
61+
# limit, but not equal to it, so there's room for other processes,
62+
# runtime overhead, error margins in different usage computation
63+
# methods etc. We can refine this over time. I'm starting with 95%
64+
# of the pod RAM limit since we also have a config reloader container
65+
# etc. See https://pkg.go.dev/runtime
66+
- name: GOMEMLIMIT
67+
value: 450MiB
68+
resources:
69+
limits:
70+
cpu: 1000m
71+
memory: 500Mi
72+
requests:
73+
cpu: 200m
74+
memory: 500Mi
75+
__END__
76+
77+
78+
"${kubectl[@]}" wait --for=condition=available --timeout=300s deployment --all -n monitoring
79+
"${kubectl[@]}" wait --for=condition=ready --timeout=300s pod --all -n monitoring
2580

2681
# vim: et ts=2 sw=2 sts=2 ft=bash ai

testcases/promtorture/scripts/kind-deploy.sh

100644100755
Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/bin/bash
2+
#
3+
set -e -u -o pipefail -x
24

3-
kind_cluster_name=promtorture
5+
source scripts/config
46

57
targets=1
68
info_metrics_labels=0
@@ -29,9 +31,8 @@ docker buildx build -t promtorture .
2931

3032
kind load docker-image promtorture --name promtorture
3133

32-
kubectl=("kubectl", "--context", "kind-${kind_cluster_name}")
3334

34-
"${kubectl[@]}" apply -f /dev/stdin <<__END__
35+
"${kubectl[@]}" apply --server-side -f /dev/stdin <<__END__
3536
apiVersion: apps/v1
3637
kind: Deployment
3738
metadata:
@@ -46,23 +47,23 @@ spec:
4647
template:
4748
metadata:
4849
labels:
49-
app: promtorture
50+
app: promtorture
5051
spec:
5152
containers:
5253
- name: promtorture
53-
image: promtorture
54-
imagePullPolicy: Never
55-
ports:
56-
- containerPort: 8080
57-
name: metrics
58-
args:
59-
- "--port=8080"
60-
- "--targets=${targets}"
61-
- "--info-metrics-labels=${info_metrics_labels}"
62-
- "--gauge-metrics=${gauge_metrics}"
54+
image: promtorture
55+
imagePullPolicy: Never
56+
ports:
57+
- containerPort: 8080
58+
name: metrics
59+
args:
60+
- "--port=8080"
61+
- "--targets=${targets}"
62+
- "--info-metrics-labels=${info_metrics_labels}"
63+
- "--gauge-metrics=${gauge_metrics}"
6364
__END__
6465

65-
"${kubectl[@]}" apply -f /dev/stdin <<__END__
66+
"${kubectl[@]}" apply --server-side -f /dev/stdin <<__END__
6667
apiVersion: v1
6768
kind: Service
6869
metadata:
@@ -77,11 +78,12 @@ spec:
7778
name: metrics
7879
__END__
7980

80-
"${kubectl[@]}" apply -f /dev/stdin <<__END__
81+
"${kubectl[@]}" apply --server-side -f /dev/stdin <<__END__
8182
kind: PodMonitor
8283
apiVersion: monitoring.coreos.com/v1
8384
metadata:
8485
name: promtorture
86+
namespace: monitoring
8587
labels:
8688
app: promtorture
8789
spec:

testcases/promtorture/scripts/meta

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
3+
set -e -u -o pipefail
4+
5+
function usage() {
6+
echo "Usage: meta -k target|targetmeta|tsdb|walreplay|config|flags|runtime"
7+
exit 1
8+
}
9+
10+
# what to fetch
11+
meta=
12+
while getopts "m:h" opt; do
13+
case ${opt} in
14+
m)
15+
meta=$OPTARG
16+
;;
17+
\?|h|: )
18+
usage
19+
;;
20+
esac
21+
done
22+
23+
if [ -z "${meta}" ]; then
24+
usage
25+
fi
26+
27+
function apiQuery() {
28+
local endpoint
29+
endpoint=$1
30+
shift
31+
curl -sL --socks5-hostname localhost:1081 -G "${@}" 'http://prometheus-k8s.monitoring.svc.cluster.local:9090/'"${endpoint}" | yq --prettyPrint .
32+
}
33+
34+
case "${meta}" in
35+
target)
36+
apiQuery "/api/v1/targets" --data 'scrapePool=podMonitor/monitoring/promtorture/0'
37+
;;
38+
targetmeta)
39+
apiQuery "/api/v1/targets/metadata" --data 'match_target={job="monitoring/promtorture"}'
40+
;;
41+
tsdb)
42+
apiQuery "/api/v1/status/tsdb"
43+
;;
44+
walreplay)
45+
apiQuery "/api/v1/status/walreplay"
46+
;;
47+
config)
48+
apiQuery "/api/v1/status/config"
49+
;;
50+
flags)
51+
apiQuery "/api/v1/status/flags"
52+
;;
53+
runtime)
54+
apiQuery "/api/v1/status/runtimeinfo"
55+
;;
56+
*)
57+
echo 1>&2 "Unrecognised option: ${meta}"
58+
usage
59+
;;
60+
esac
61+
62+
# vim: set ft=sh et ai sw=2 ts=2 sts=2:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
set -e -u -o pipefail
4+
5+
source scripts/config
6+
7+
exec "${kubectl_bin}" stern "${kubectl_context[@]}" -n monitoring -l app.kubernetes.io/instance=k8s,app.kubernetes.io/name=prometheus "$@"
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
#
3+
# Delete the prom pods, forcing their ephemeral storage to be deleted
4+
# and TSDB heads to be pruned
5+
#
6+
7+
set -e -u -o pipefail
8+
9+
source scripts/config
10+
11+
if ! "${kubectl[@]}" delete pod -n monitoring -l app.kubernetes.io/instance=k8s,app.kubernetes.io/name=prometheus --grace-period=5 --timeout=10s
12+
then
13+
"${kubectl[@]}" delete pod -n monitoring -l app.kubernetes.io/instance=k8s,app.kubernetes.io/name=prometheus --force
14+
fi
15+
sleep 1
16+
echo 1>&2 "waiting for statefulset"
17+
"${kubectl[@]}" wait --v 1 -n monitoring statefulset/prometheus-k8s --for=jsonpath='{.status.availableReplicas}=1' --timeout=30s
18+
echo 1>&2 "waiting for pod ready"
19+
"${kubectl[@]}" wait --v 1 -n monitoring --for=condition=ready --timeout=300s pod -l app.kubernetes.io/instance=k8s,app.kubernetes.io/name=prometheus --timeout=30s

0 commit comments

Comments
 (0)