5
5
#
6
6
# This should be in a real language not bash, but meh.
7
7
#
8
+ # Set env-var TSDB_DUMP=true to save a full TSDB dump locally if desired.
9
+ #
8
10
9
11
set -e -u -o pipefail -x
10
12
@@ -19,6 +21,13 @@ echo 1>&2 "Dumping metrics to $tmpdir"
19
21
# convenient way to record the promtorture invocation
20
22
echo " $@ " > " $tmpdir /args"
21
23
24
+ promtorture_pod=" $( " ${kubectl[@]} " get pod -n default -l app=promtorture -o jsonpath=' {.items[0].metadata.name}' ) "
25
+
26
+ mkdir -p " ${tmpdir} /manifests"
27
+ " ${kubectl[@]} " get -n default deployment/promtorture -o yaml > " $tmpdir /manifests/promtorture-deployment.yaml"
28
+ " ${kubectl[@]} " get -n default pod/" ${promtorture_pod} " -o yaml > " $tmpdir /manifests/promtorture-pod.yaml"
29
+ " ${kubectl[@]} " get -n monitoring podmonitor/promtorture -o yaml > " $tmpdir /manifests/promtorture-podmonitor.yaml"
30
+
22
31
socks5_port=31121
23
32
socks5_host=localhost
24
33
kubectl socks5-proxy -N socks5-proxy -p 31121 &
@@ -28,17 +37,22 @@ socks5_pid=$!
28
37
# socks5-proxy script. For now we'll find the child proc and kill it.
29
38
# It should also wait with timeout for the wrapper to exit, but bash's wait
30
39
# lacks a timeout option...
31
- trap ' kill $(pgrep -P ${socks5_pid}); sleep 10; kill ${socks5_pid}' EXIT
40
+ trap ' kill $(pgrep -P ${socks5_pid}); sleep 10; kill ${socks5_pid} >&/dev/null ' EXIT
32
41
export http_proxy=" socks5://${socks5_host} :${socks5_port} "
33
42
while : ; do
34
43
# wait for proxy to be ready by checking prometheus is reachable
35
- if curl -sL http://prometheus-k8s.monitoring.svc.cluster.local:9090 > /dev/null; then
44
+ if curl -sSL http://prometheus-k8s.monitoring.svc.cluster.local:9090 > /dev/null; then
36
45
break
37
46
fi
38
47
sleep 1
39
48
done
40
49
41
- promtorture_pod=" $( " ${kubectl[@]} " get pod -n default -l app=promtorture -o jsonpath=' {.items[0].metadata.name}' ) "
50
+ # Also grab the args from Prometheus, so we see what's really running.
51
+ # This will only be reliable if there's only one recent instance of
52
+ # promtorture running.
53
+ curl -sSL -G --data-urlencode ' query=scrape_duration_seconds{container="promtorture",job="monitoring/promtorture",arguments=~".+"}' \
54
+ " http://prometheus-k8s.monitoring.svc.cluster.local:9090/api/v1/query" \
55
+ | yq --prettyPrint ' .data.result[0].metric.arguments' > " $tmpdir /promtorture-args"
42
56
43
57
# metrics queries
44
58
mkdir " $tmpdir /metrics"
@@ -50,7 +64,7 @@ function instant_query_promtool() {
50
64
51
65
function instant_query_curl() {
52
66
echo ' # query: ' " $1 " > " $tmpdir /$2 "
53
- curl -sL -G --data-urlencode " query=$1 " " http://prometheus-k8s.monitoring.svc.cluster.local:9090/api/v1/query" | yq --prettyPrint .data >> " $tmpdir /$2 "
67
+ curl -sSL -G --data-urlencode " query=$1 " " http://prometheus-k8s.monitoring.svc.cluster.local:9090/api/v1/query" | yq --prettyPrint .data >> " $tmpdir /$2 "
54
68
}
55
69
56
70
function instant_query() {
@@ -112,10 +126,19 @@ snap_name="$(./scripts/promapi -m snapshot | yq .data.name)"
112
126
echo 1>&2 " Created tsdb snapshot ${snap_name} "
113
127
./scripts/promcmd du -ks " /prometheus/snapshots/${snap_name} " > " $tmpdir /tsdb-snapshot/size-kb"
114
128
mkdir " $tmpdir /tsdb-snapshot/raw"
115
- " ${kubectl[@]} " cp " monitoring/prometheus-k8s-0:/prometheus/snapshots/${snap_name} /" " ${tmpdir} /tsdb-snapshot/raw/"
129
+ # work around https://github.com/kubernetes/kubernetes/pull/78622
130
+ # and https://github.com/kubernetes/kubernetes/issues/77310 by cd'ing into the dir
131
+ # containing colons in the pathname, avoiding the error
132
+ # error: one of src or dest must be a local file specification
133
+ # when kubectl cp misinterprets the path as a remote spec.
134
+ (cd " ${tmpdir} " && " ${kubectl[@]} " cp " monitoring/prometheus-k8s-0:/prometheus/snapshots/${snap_name} /" " tsdb-snapshot/raw/" )
135
+
136
+ # promtool has some tsdb analytics that should tell us a lot
137
+ ./scripts/promcmd promtool tsdb analyze " /prometheus/snapshots/${snap_name} " --limit=100 > " $tmpdir /tsdb-snapshot/analyze-all"
138
+ ./scripts/promcmd promtool tsdb analyze " /prometheus/snapshots/${snap_name} " --limit=100 --match=' {job="monitoring/promtorture"}' > " $tmpdir /tsdb-snapshot/analyze-job-promtorture"
116
139
117
140
# metrics dump
118
- if [[ " ${TSDB_DUMP} " == " true" ]]; then
141
+ if [[ " ${TSDB_DUMP:- } " == " true" ]]; then
119
142
# we're dumping the tsdb snapshot here so we don't have to deal with running promtool's own
120
143
# dump and grabbing the file from the container FS.
121
144
#
@@ -130,4 +153,6 @@ if [[ "${TSDB_DUMP}" == "true" ]]; then
130
153
./scripts/promcmd promtool tsdb dump --sandbox-dir-root=" /prometheus/replay" " /prometheus/snapshots/${snap_name} " | ${compress} -c > " $tmpdir /tsdb-snapshot/tsdb-dump" ." ${compress} "
131
154
fi
132
155
156
+ echo 1>&2 " Results written to ${tmpdir} "
157
+
133
158
# vim: set ts=2 sw=2 et ai :
0 commit comments