Skip to content

Commit fbed217

Browse files
committed
promtorture: scripts to grab the metrics
1 parent 36b0127 commit fbed217

File tree

5 files changed

+167
-4
lines changed

5 files changed

+167
-4
lines changed

testcases/promtorture/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ promtorture
22
promtool
33
.cache
44
notes.md
5+
promtorture-metrics-*

testcases/promtorture/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ go build
2828
## scripts
2929

3030
- [`kind-create.sh`](./scripts/kind-create.sh): Create a kind cluster and deply kube-prometheus
31+
- [`clean-collect-promtorture`](./scripts/clean-collect-promtorture): Delete prometheus data, deploy promtorture with specified args, and collect various prom stats and metrics using `script/grab-metrics.sh`
32+
- [`scripts/grab-metrics.sh`](./scripts/grab-metrics.sh): Grab various data about prometheus's memory use, storage, the current promtorture workload scrapes, etc. Collects a tsdb snapshot.
3133
- [`kind-deploy.sh`](./scripts/kind-deploy.sh): Deploy promtorture, takes CLI options for target and label counts
3234
- [`promapi`](./scripts/promapi): A simple prometheus API client, see
3335
`./scripts/promapi -h`. Can get TSDB info, make snapshots, etc.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
#
3+
# Deploy a promtorture with the supplied arguments, let it run, and grab some
4+
# prometheus metrics + a tsdb snapshot and a metrics dump from it.
5+
#
6+
7+
set -e -u -o pipefail
8+
9+
source scripts/config
10+
11+
if ! "${kubectl[@]}" delete deployment promtorture; then
12+
echo 1>&2 "Failed to delete promtorture deployment, presumed nonexistent"
13+
fi
14+
15+
# Nuke the prometheus for a clean slate
16+
./scripts/promnuke
17+
18+
# Deploy the promtorture job
19+
./scripts/kind-deploy.sh "$@"
20+
21+
# Wait for some scrapes
22+
sleep 60
23+
24+
# Grab metrics, snapshot etc
25+
./scripts/grab-metrics.sh "$@"
26+
27+
# vim: set ts=2 sw=2 et ai :
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/bin/bash
2+
#
3+
# Grab some prometheus metrics + a tsdb snapshot and a metrics dump from
4+
# the prometheus server running in the kind cluster.
5+
#
6+
# This should be in a real language not bash, but meh.
7+
#
8+
9+
set -e -u -o pipefail -x
10+
11+
source scripts/config
12+
13+
tmpdir="promtorture-metrics-$(date -Isec)"
14+
15+
echo 1>&2 "Dumping metrics to $tmpdir"
16+
17+
# arguments passed to this script just get dumped to the tempdir, so it's a
18+
# convenient way to record the promtorture invocation
19+
echo "$@" > "$tmpdir/args"
20+
21+
socks5_port=31121
22+
socks5_host=localhost
23+
kubectl socks5-proxy -N socks5-proxy -p 31121 &
24+
socks5_pid=$!
25+
# Becuse socks5-proxy is written in bash this kills the script but not the
26+
# underlying kubectl port-forward process. We should fix this in the
27+
# socks5-proxy script. For now we'll find the child proc and kill it.
28+
trap 'kill $(pgrep -P ${socks5_pid}); kill ${socks5_pid}' EXIT
29+
export http_proxy="socks5://${socks5_host}:${socks5_port}"
30+
while : ; do
31+
# wait for proxy to be ready by checking prometheus is reachable
32+
if curl -sL http://prometheus-k8s.monitoring.svc.cluster.local:9090 > /dev/null; then
33+
break
34+
fi
35+
sleep 1
36+
done
37+
38+
promtorture_pod="$("${kubectl[@]}" get pod -n default -l app=promtorture -o jsonpath='{.items[0].metadata.name}')"
39+
40+
# metrics queries
41+
mkdir "$tmpdir/metrics"
42+
43+
function instant_query_promtool() {
44+
echo '# query: ' "$1" > "$tmpdir/$2"
45+
./scripts/promcmd promtool query instant http://localhost:9090 "$1" >> "$tmpdir/$2"
46+
}
47+
48+
function instant_query_curl() {
49+
echo '# query: ' "$1" > "$tmpdir/$2"
50+
curl -sL -G --data-urlencode "query=$1" "http://prometheus-k8s.monitoring.svc.cluster.local:9090/api/v1/query" | yq --prettyPrint .data >> "$tmpdir/$2"
51+
}
52+
53+
function instant_query() {
54+
instant_query_curl "$1" "${2}.yaml"
55+
instant_query_promtool "$1" "${2}.prom"
56+
}
57+
58+
59+
promtorture_label_join=' * on (job,container,pod) group_left(gauge_metrics,info_labels,targets) (promtorture_info{job="monitoring/promtorture",container="promtorture"})'
60+
61+
declare -A queries
62+
queries[prometheus_tsdb_head_series]='increase(prometheus_tsdb_head_series{job="prometheus-k8s",container="prometheus"}[5m])'
63+
queries[prometheus_tsdb_head_chunks]='prometheus_tsdb_head_chunks{job="prometheus-k8s",container="prometheus"}'
64+
queries[delta_prometheus_tsdb_head_chunks]='delta(prometheus_tsdb_head_chunks{job="prometheus-k8s",container="prometheus"}[5m])'
65+
queries[prometheus_tsdb_head_chunks_created_total]='prometheus_tsdb_head_chunks_created_total{job="prometheus-k8s",container="prometheus"}'
66+
queries[prometheus_tsdb_head_chunks_storage_size_bytes]='prometheus_tsdb_head_chunks_storage_size_bytes{job="prometheus-k8s",container="prometheus"}'
67+
queries[prometheus_tsdb_storage_blocks_bytes]='prometheus_tsdb_storage_blocks_bytes{job="prometheus-k8s",container="prometheus"}'
68+
queries[process_resident_memory_bytes]='process_resident_memory_bytes{job="prometheus-k8s",container="prometheus"}'
69+
queries[go_gc_gomemlimit_bytes]='go_gc_gomemlimit_bytes{job="prometheus-k8s",container="prometheus"}'
70+
queries[prometheus_tsdb_symbol_table_size_bytes]='prometheus_tsdb_symbol_table_size_bytes{job="prometheus-k8s",container="prometheus"}'
71+
queries[container_memory_working_set_bytes]='container_memory_working_set_bytes{container="prometheus",pod="prometheus-k8s-0"}'
72+
queries[sum_prometheus_tsdb_head_series]='sum by () (prometheus_target_metadata_cache_bytes{job="prometheus-k8s",container="prometheus"})'
73+
queries[promtorture_max_scrape_duration_seconds]='max(scrape_duration_seconds{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"})'"${promtorture_label_join}"
74+
queries[promtorture_avg_scrape_duration_seconds]='avg(scrape_duration_seconds{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"})'"${promtorture_label_join}"
75+
queries[promtorture_scrape_samples_scraped]='scrape_samples_scraped{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"}'"${promtorture_label_join}"
76+
queries[promtorture_scrape_body_size_bytes]='scrape_body_size_bytes{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"}'"${promtorture_label_join}"
77+
queries[promtorture_sum_scrape_series_added]='sum(scrape_series_added{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"})'"${promtorture_label_join}"
78+
79+
# these will be run as instant, max() aggregate and avg() aggregate
80+
declare -A queries_with_aggregates
81+
queries_with_aggregates[go_memstats_heap_sys_bytes]='go_memstats_heap_sys_bytes{job="prometheus-k8s",container="prometheus"}'
82+
queries_with_aggregates[go_memstats_alloc_bytes]='go_memstats_alloc_bytes{job="prometheus-k8s",container="prometheus"}'
83+
queries_with_aggregates[go_memstats_heap_sys_bytes]='go_memstats_heap_sys_bytes{job="prometheus-k8s",container="prometheus"}'
84+
queries_with_aggregates[go_memstats_sys_bytes]='go_memstats_sys_bytes{job="prometheus-k8s",container="prometheus"}'
85+
queries_with_aggregates[go_memstats_heap_inuse_bytes]='go_memstats_heap_inuse_bytes{job="prometheus-k8s",container="prometheus"}'
86+
87+
for query in "${!queries[@]}"; do
88+
instant_query "${queries[$query]}" "metrics/$query"
89+
done
90+
91+
for query in "${!queries_with_aggregates[@]}"; do
92+
instant_query "${queries_with_aggregates[$query]}" "metrics/$query"
93+
instant_query "max(${queries_with_aggregates[$query]})" "metrics/$query-max"
94+
instant_query "avg(${queries_with_aggregates[$query]})" "metrics/$query-avg"
95+
done
96+
97+
# prom api info dumps
98+
mkdir "$tmpdir/api"
99+
for endpoint in target targetmeta tsdb-head walreplay config flags runtime; do
100+
./scripts/promapi -m "${endpoint}" > "$tmpdir/api/${endpoint}.yaml"
101+
done
102+
103+
# collect more than the default 10 labels
104+
./scripts/promapi -m "${endpoint}" -l 200 > "$tmpdir/api/${endpoint}.yaml"
105+
106+
# tsdb snapshot - create, download, get size
107+
mkdir "$tmpdir/tsdb-snapshot"
108+
snap_name="$(./scripts/promapi -m snapshot | yq .data.name)"
109+
echo 1>&2 "Created tsdb snapshot ${snap_name}"
110+
./scripts/promcmd du -ks "/prometheus/snapshots/${snap_name}" > "$tmpdir/tsdb-snapshot/size-kb"
111+
mkdir "$tmpdir/tsdb-snapshot/raw"
112+
"${kubectl[@]}" cp "monitoring/prometheus-k8s-0:/prometheus/snapshots/${snap_name}/" "${tmpdir}/tsdb-snapshot/raw/"
113+
114+
# metrics dump
115+
if [[ "${TSDB_DUMP}" == "true" ]]; then
116+
# we're dumping the tsdb snapshot here so we don't have to deal with running promtool's own
117+
# dump and grabbing the file from the container FS.
118+
#
119+
# Work around its assumption that there will be WAL for a snapshot:
120+
./scripts/promcmd mkdir -p "/prometheus/snapshots/${snap_name}/wal" "/prometheus/replay"
121+
# and dump to stdout
122+
compress="lz4"
123+
if ! type -p lz4 >&/dev/null; then
124+
echo 1>&2 "no lz4 command, falling back to gzip"
125+
compress="gzip"
126+
fi
127+
./scripts/promcmd promtool tsdb dump --sandbox-dir-root="/prometheus/replay" "/prometheus/snapshots/${snap_name}" | ${compress} -c > "$tmpdir/tsdb-snapshot/tsdb-dump"."${compress}"
128+
fi
129+
130+
# vim: set ts=2 sw=2 et ai :

testcases/promtorture/scripts/promapi

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
#!/bin/bash
22
#
3-
# This script requires a socks5 proxy
4-
# from scripts/socks5 to be running first
3+
# This script requires that it be able to resolve the kube endpoints
4+
# directly. Either use
5+
# http_proxy=socks5://localhost:1080 ./promapi [...]
6+
# with the proxy in ./scripts/socks5, or use a wireshark link like
7+
# that provided by `kubectl insider` to get direct DNS and connectivity.
58
#
69

710
set -e -u -o pipefail
811

912
function usage() {
10-
echo "Usage: meta -k target|targetmeta|tsdb|tsdb-head|walreplay|config|flags|runtime|snapshot"
13+
echo "Usage: promapi -m target|targetmeta|tsdb|tsdb-head|walreplay|config|flags|runtime|snapshot [-l nnnn]"
1114
echo
1215
echo "For tsdb:"
1316
echo " -l nnnn : number of entries in the top-n lists to return"
@@ -39,7 +42,7 @@ function apiQuery() {
3942
local endpoint
4043
endpoint=$1
4144
shift
42-
curl -sL --socks5-hostname localhost:1081 "${@}" 'http://prometheus-k8s.monitoring.svc.cluster.local:9090/'"${endpoint}" | yq --prettyPrint .
45+
curl -sL "${@}" 'http://prometheus-k8s.monitoring.svc.cluster.local:9090/'"${endpoint}" | yq --prettyPrint .
4346
}
4447

4548
case "${meta}" in

0 commit comments

Comments
 (0)