|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# Grab some prometheus metrics + a tsdb snapshot and a metrics dump from |
| 4 | +# the prometheus server running in the kind cluster. |
| 5 | +# |
| 6 | +# This should be in a real language not bash, but meh. |
| 7 | +# |
| 8 | + |
| 9 | +set -e -u -o pipefail -x |
| 10 | + |
| 11 | +source scripts/config |
| 12 | + |
| 13 | +tmpdir="promtorture-metrics-$(date -Isec)" |
| 14 | + |
| 15 | +echo 1>&2 "Dumping metrics to $tmpdir" |
| 16 | + |
| 17 | +# arguments passed to this script just get dumped to the tempdir, so it's a |
| 18 | +# convenient way to record the promtorture invocation |
| 19 | +echo "$@" > "$tmpdir/args" |
| 20 | + |
| 21 | +socks5_port=31121 |
| 22 | +socks5_host=localhost |
| 23 | +kubectl socks5-proxy -N socks5-proxy -p 31121 & |
| 24 | +socks5_pid=$! |
| 25 | +# Becuse socks5-proxy is written in bash this kills the script but not the |
| 26 | +# underlying kubectl port-forward process. We should fix this in the |
| 27 | +# socks5-proxy script. For now we'll find the child proc and kill it. |
| 28 | +trap 'kill $(pgrep -P ${socks5_pid}); kill ${socks5_pid}' EXIT |
| 29 | +export http_proxy="socks5://${socks5_host}:${socks5_port}" |
| 30 | +while : ; do |
| 31 | + # wait for proxy to be ready by checking prometheus is reachable |
| 32 | + if curl -sL http://prometheus-k8s.monitoring.svc.cluster.local:9090 > /dev/null; then |
| 33 | + break |
| 34 | + fi |
| 35 | + sleep 1 |
| 36 | +done |
| 37 | + |
| 38 | +promtorture_pod="$("${kubectl[@]}" get pod -n default -l app=promtorture -o jsonpath='{.items[0].metadata.name}')" |
| 39 | + |
| 40 | +# metrics queries |
| 41 | +mkdir "$tmpdir/metrics" |
| 42 | + |
| 43 | +function instant_query_promtool() { |
| 44 | + echo '# query: ' "$1" > "$tmpdir/$2" |
| 45 | + ./scripts/promcmd promtool query instant http://localhost:9090 "$1" >> "$tmpdir/$2" |
| 46 | +} |
| 47 | + |
| 48 | +function instant_query_curl() { |
| 49 | + echo '# query: ' "$1" > "$tmpdir/$2" |
| 50 | + curl -sL -G --data-urlencode "query=$1" "http://prometheus-k8s.monitoring.svc.cluster.local:9090/api/v1/query" | yq --prettyPrint .data >> "$tmpdir/$2" |
| 51 | +} |
| 52 | + |
| 53 | +function instant_query() { |
| 54 | + instant_query_curl "$1" "${2}.yaml" |
| 55 | + instant_query_promtool "$1" "${2}.prom" |
| 56 | +} |
| 57 | + |
| 58 | + |
| 59 | +promtorture_label_join=' * on (job,container,pod) group_left(gauge_metrics,info_labels,targets) (promtorture_info{job="monitoring/promtorture",container="promtorture"})' |
| 60 | + |
| 61 | +declare -A queries |
| 62 | +queries[prometheus_tsdb_head_series]='increase(prometheus_tsdb_head_series{job="prometheus-k8s",container="prometheus"}[5m])' |
| 63 | +queries[prometheus_tsdb_head_chunks]='prometheus_tsdb_head_chunks{job="prometheus-k8s",container="prometheus"}' |
| 64 | +queries[delta_prometheus_tsdb_head_chunks]='delta(prometheus_tsdb_head_chunks{job="prometheus-k8s",container="prometheus"}[5m])' |
| 65 | +queries[prometheus_tsdb_head_chunks_created_total]='prometheus_tsdb_head_chunks_created_total{job="prometheus-k8s",container="prometheus"}' |
| 66 | +queries[prometheus_tsdb_head_chunks_storage_size_bytes]='prometheus_tsdb_head_chunks_storage_size_bytes{job="prometheus-k8s",container="prometheus"}' |
| 67 | +queries[prometheus_tsdb_storage_blocks_bytes]='prometheus_tsdb_storage_blocks_bytes{job="prometheus-k8s",container="prometheus"}' |
| 68 | +queries[process_resident_memory_bytes]='process_resident_memory_bytes{job="prometheus-k8s",container="prometheus"}' |
| 69 | +queries[go_gc_gomemlimit_bytes]='go_gc_gomemlimit_bytes{job="prometheus-k8s",container="prometheus"}' |
| 70 | +queries[prometheus_tsdb_symbol_table_size_bytes]='prometheus_tsdb_symbol_table_size_bytes{job="prometheus-k8s",container="prometheus"}' |
| 71 | +queries[container_memory_working_set_bytes]='container_memory_working_set_bytes{container="prometheus",pod="prometheus-k8s-0"}' |
| 72 | +queries[sum_prometheus_tsdb_head_series]='sum by () (prometheus_target_metadata_cache_bytes{job="prometheus-k8s",container="prometheus"})' |
| 73 | +queries[promtorture_max_scrape_duration_seconds]='max(scrape_duration_seconds{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"})'"${promtorture_label_join}" |
| 74 | +queries[promtorture_avg_scrape_duration_seconds]='avg(scrape_duration_seconds{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"})'"${promtorture_label_join}" |
| 75 | +queries[promtorture_scrape_samples_scraped]='scrape_samples_scraped{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"}'"${promtorture_label_join}" |
| 76 | +queries[promtorture_scrape_body_size_bytes]='scrape_body_size_bytes{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"}'"${promtorture_label_join}" |
| 77 | +queries[promtorture_sum_scrape_series_added]='sum(scrape_series_added{job="monitoring/promtorture",container="promtorture",pod="'"${promtorture_pod}"'"})'"${promtorture_label_join}" |
| 78 | + |
| 79 | +# these will be run as instant, max() aggregate and avg() aggregate |
| 80 | +declare -A queries_with_aggregates |
| 81 | +queries_with_aggregates[go_memstats_heap_sys_bytes]='go_memstats_heap_sys_bytes{job="prometheus-k8s",container="prometheus"}' |
| 82 | +queries_with_aggregates[go_memstats_alloc_bytes]='go_memstats_alloc_bytes{job="prometheus-k8s",container="prometheus"}' |
| 83 | +queries_with_aggregates[go_memstats_heap_sys_bytes]='go_memstats_heap_sys_bytes{job="prometheus-k8s",container="prometheus"}' |
| 84 | +queries_with_aggregates[go_memstats_sys_bytes]='go_memstats_sys_bytes{job="prometheus-k8s",container="prometheus"}' |
| 85 | +queries_with_aggregates[go_memstats_heap_inuse_bytes]='go_memstats_heap_inuse_bytes{job="prometheus-k8s",container="prometheus"}' |
| 86 | + |
| 87 | +for query in "${!queries[@]}"; do |
| 88 | + instant_query "${queries[$query]}" "metrics/$query" |
| 89 | +done |
| 90 | + |
| 91 | +for query in "${!queries_with_aggregates[@]}"; do |
| 92 | + instant_query "${queries_with_aggregates[$query]}" "metrics/$query" |
| 93 | + instant_query "max(${queries_with_aggregates[$query]})" "metrics/$query-max" |
| 94 | + instant_query "avg(${queries_with_aggregates[$query]})" "metrics/$query-avg" |
| 95 | +done |
| 96 | + |
| 97 | +# prom api info dumps |
| 98 | +mkdir "$tmpdir/api" |
| 99 | +for endpoint in target targetmeta tsdb-head walreplay config flags runtime; do |
| 100 | + ./scripts/promapi -m "${endpoint}" > "$tmpdir/api/${endpoint}.yaml" |
| 101 | +done |
| 102 | + |
| 103 | +# collect more than the default 10 labels |
| 104 | +./scripts/promapi -m "${endpoint}" -l 200 > "$tmpdir/api/${endpoint}.yaml" |
| 105 | + |
| 106 | +# tsdb snapshot - create, download, get size |
| 107 | +mkdir "$tmpdir/tsdb-snapshot" |
| 108 | +snap_name="$(./scripts/promapi -m snapshot | yq .data.name)" |
| 109 | +echo 1>&2 "Created tsdb snapshot ${snap_name}" |
| 110 | +./scripts/promcmd du -ks "/prometheus/snapshots/${snap_name}" > "$tmpdir/tsdb-snapshot/size-kb" |
| 111 | +mkdir "$tmpdir/tsdb-snapshot/raw" |
| 112 | +"${kubectl[@]}" cp "monitoring/prometheus-k8s-0:/prometheus/snapshots/${snap_name}/" "${tmpdir}/tsdb-snapshot/raw/" |
| 113 | + |
| 114 | +# metrics dump |
| 115 | +if [[ "${TSDB_DUMP}" == "true" ]]; then |
| 116 | + # we're dumping the tsdb snapshot here so we don't have to deal with running promtool's own |
| 117 | + # dump and grabbing the file from the container FS. |
| 118 | + # |
| 119 | + # Work around its assumption that there will be WAL for a snapshot: |
| 120 | + ./scripts/promcmd mkdir -p "/prometheus/snapshots/${snap_name}/wal" "/prometheus/replay" |
| 121 | + # and dump to stdout |
| 122 | + compress="lz4" |
| 123 | + if ! type -p lz4 >&/dev/null; then |
| 124 | + echo 1>&2 "no lz4 command, falling back to gzip" |
| 125 | + compress="gzip" |
| 126 | + fi |
| 127 | + ./scripts/promcmd promtool tsdb dump --sandbox-dir-root="/prometheus/replay" "/prometheus/snapshots/${snap_name}" | ${compress} -c > "$tmpdir/tsdb-snapshot/tsdb-dump"."${compress}" |
| 128 | +fi |
| 129 | + |
| 130 | +# vim: set ts=2 sw=2 et ai : |
0 commit comments