Skip to content

Commit ce8fe71

Browse files
authored
Add CPU stats into monitoring (#244)
* Add comment line * Store cpu and telemetry cpu values for default bench
1 parent 74b6d89 commit ce8fe71

File tree

6 files changed

+101
-9
lines changed

6 files changed

+101
-9
lines changed

.github/workflows/manual-compare-versions-benchmark.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ jobs:
167167
res_p99_time=$(compare "p99_time" "${{ needs.runBenchmarkForVersion1.outputs.p99_time }}" "${{ needs.runBenchmarkForVersion2.outputs.p99_time }}")
168168
res_vm_rss_memory_usage=$(compare "vm_rss_memory_usage" "${{ needs.runBenchmarkForVersion1.outputs.vm_rss_memory_usage }}" "${{ needs.runBenchmarkForVersion2.outputs.vm_rss_memory_usage }}")
169169
res_rss_anon_memory_usage=$(compare "rss_anon_memory_usage" "${{ needs.runBenchmarkForVersion1.outputs.rss_anon_memory_usage }}" "${{ needs.runBenchmarkForVersion2.outputs.rss_anon_memory_usage }}")
170+
res_cpu_usage=$(compare "cpu_usage" "${{ needs.runBenchmarkForVersion1.outputs.cpu }}" "${{ needs.runBenchmarkForVersion2.outputs.cpu }}")
170171
res_upload_time=$(compare "upload_time" "${{ needs.runBenchmarkForVersion1.outputs.upload_time }}" "${{ needs.runBenchmarkForVersion2.outputs.upload_time }}")
171172
res_indexing_time=$(compare "indexing_time" "${{ needs.runBenchmarkForVersion1.outputs.indexing_time }}" "${{ needs.runBenchmarkForVersion2.outputs.indexing_time }}")
172173
@@ -179,6 +180,7 @@ jobs:
179180
echo "| p95_time | ${{ needs.runBenchmarkForVersion1.outputs.p95_time }} | ${{ needs.runBenchmarkForVersion2.outputs.p95_time }} | ${res_p95_time} |" >> $GITHUB_STEP_SUMMARY
180181
echo "| p99_time | ${{ needs.runBenchmarkForVersion1.outputs.p99_time }} | ${{ needs.runBenchmarkForVersion2.outputs.p99_time }} | ${res_p99_time} |" >> $GITHUB_STEP_SUMMARY
181182
echo "| vm_rss_memory_usage | ${{ needs.runBenchmarkForVersion1.outputs.vm_rss_memory_usage }} | ${{ needs.runBenchmarkForVersion2.outputs.vm_rss_memory_usage }} | ${res_vm_rss_memory_usage} |" >> $GITHUB_STEP_SUMMARY
182-
echo "| rss_anon_memory_usage | ${{ needs.runBenchmarkForVersion1.outputs.rss_anon_memory_usage }} | ${{ needs.runBenchmarkForVersion2.outputs.rss_anon_memory_usage }} | ${res_rss_anon_memory_usage} |" >> $GITHUB_STEP_SUMMARY
183+
echo "| rss_anon_memory_usage | ${{ needs.runBenchmarkForVersion1.outputs.rss_anon_memory_usage }} | ${{ needs.runBenchmarkForVersion2.outputs.rss_anon_memory_usage }} | ${res_rss_anon_memory_usage} |" >> $GITHUB_STEP_SUMMARY
184+
echo "| cpu | ${{ needs.runBenchmarkForVersion1.outputs.cpu }} | ${{ needs.runBenchmarkForVersion2.outputs.cpu }} | ${res_cpu_usage} |" >> $GITHUB_STEP_SUMMARY
183185
echo "| upload_time | ${{ needs.runBenchmarkForVersion1.outputs.upload_time }} | ${{ needs.runBenchmarkForVersion2.outputs.upload_time }} | ${res_upload_time} |" >> $GITHUB_STEP_SUMMARY
184186
echo "| indexing_time | ${{ needs.runBenchmarkForVersion1.outputs.indexing_time }} | ${{ needs.runBenchmarkForVersion2.outputs.indexing_time }} | ${res_indexing_time} |" >> $GITHUB_STEP_SUMMARY

tools/qdrant_collect_cpu_usage.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
3+
PS4='ts=$(date "+%Y-%m-%dT%H:%M:%SZ") level=DEBUG line=$LINENO file=$BASH_SOURCE '
4+
set -euo pipefail
5+
6+
# Examples: start or end
7+
MODE=$1
8+
9+
CLOUD_NAME=${CLOUD_NAME:-"hetzner"}
10+
SERVER_USERNAME=${SERVER_USERNAME:-"root"}
11+
12+
SCRIPT=$(realpath "$0")
13+
SCRIPT_PATH=$(dirname "$SCRIPT")
14+
15+
BENCH_SERVER_NAME=${SERVER_NAME:-"benchmark-server-1"}
16+
17+
IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_public_ip.sh" "$BENCH_SERVER_NAME")
18+
19+
UTIME=$(ssh -tt -o ServerAliveInterval=10 -o ServerAliveCountMax=10 "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "cat /proc/\$(pidof qdrant)/stat | awk '{print \$14}'")
20+
# Clean up any whitespace characters
21+
UTIME=$(echo "$UTIME" | tr -d '[:space:]')
22+
23+
CURRENT_DATE=$(date +%Y-%m-%d-%H-%M-%S)
24+
25+
mkdir -p results/cpu
26+
27+
if [[ "$MODE" == "end" ]]; then
28+
echo "Calculate CPU usage (seconds) over period of time"
29+
UTIME_FILE=$(ls -t results/cpu/utime-*.txt | head -n 1)
30+
UTIME_START=$(cat "$UTIME_FILE" | tr -d '[:space:]')
31+
echo "$UTIME" >> "${UTIME_FILE}"
32+
CPU=$(echo "scale=2; ($UTIME - $UTIME_START) / 100" | bc)
33+
echo "$CPU" > "./results/cpu/cpu-usage-${CURRENT_DATE}.txt"
34+
elif [[ "$MODE" == "start" ]]; then
35+
echo "Store utime start value in ./results/cpu/utime-${CURRENT_DATE}.txt"
36+
echo "$UTIME" > "./results/cpu/utime-${CURRENT_DATE}.txt"
37+
else
38+
echo "Unknown mode: $MODE"
39+
exit 1
40+
fi

tools/run_ci.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ trap 'handle_term' TERM
2424

2525
# Script, that runs benchmark within the GitHub Actions CI environment
2626

27-
BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}
27+
# Possible values for BENCHMARK_STRATEGY: default, tenants, parallel and collection-reload
28+
export BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}
2829

2930
SCRIPT=$(realpath "$0")
3031
SCRIPT_PATH=$(dirname "$SCRIPT")
@@ -37,9 +38,14 @@ if [[ "$BENCHMARK_STRATEGY" == "collection-reload" ]]; then
3738
export TELEMETRY_API_RESPONSE_FILE=$(ls -t results/telemetry-api-*.json | head -n 1)
3839
else
3940
# any other strategies are considered to have search & upload results
41+
export TELEMETRY_API_RESPONSE_FILE=$(ls -t results/telemetry-api-*.json | head -n 1)
4042
export SEARCH_RESULTS_FILE=$(find results/ -maxdepth 1 -type f -name '*-search-*.json' -printf '%T@ %p\n' | sort -nr | head -n 1 | cut -d' ' -f2-)
4143
export UPLOAD_RESULTS_FILE=$(find results/ -maxdepth 1 -type f -name '*-upload-*.json' -printf '%T@ %p\n' | sort -nr | head -n 1 | cut -d' ' -f2-)
4244

45+
if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
46+
export CPU_USAGE_FILE=$(ls -t results/cpu/cpu-usage-*.txt | head -n 1)
47+
fi
48+
4349
if [[ "$BENCHMARK_STRATEGY" == "parallel" ]]; then
4450
export PARALLEL_UPLOAD_RESULTS_FILE=$(ls -t results/parallel/*-upload-*.json | head -n 1)
4551
export PARALLEL_SEARCH_RESULTS_FILE=$(ls -t results/parallel/*-search-*.json | head -n 1)

tools/run_remote_benchmark.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,12 @@ case "$BENCHMARK_STRATEGY" in
4444

4545
bash -x "${SCRIPT_PATH}/run_server_container.sh" "$SERVER_CONTAINER_NAME"
4646

47+
bash -x "${SCRIPT_PATH}/qdrant_collect_cpu_usage.sh" "start"
48+
4749
bash -x "${SCRIPT_PATH}/run_client_script.sh"
4850

51+
bash -x "${SCRIPT_PATH}/qdrant_collect_cpu_usage.sh" "end"
52+
4953
bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
5054
;;
5155
"tenants")

tools/upload_parallel_results_postgres.sh

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# p99_time real,
2020
# search_time real,
2121
# no_upsert_search_time real,
22+
# cpu real,
23+
# cpu_telemetry real,
2224
# );
2325

2426
PARALLEL_SEARCH_RESULTS_FILE=${PARALLEL_SEARCH_RESULTS_FILE:-""}
@@ -63,6 +65,13 @@ if [[ -z "$ROOT_API_RESPONSE_FILE" ]]; then
6365
exit 1
6466
fi
6567

68+
if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
69+
if [[ -z "$CPU_USAGE_FILE" ]]; then
70+
echo "CPU_USAGE_FILE is not set"
71+
exit 1
72+
fi
73+
fi
74+
6675
RPS=NULL
6776
MEAN_PRECISIONS=NULL
6877
P95_TIME=NULL
@@ -71,6 +80,8 @@ UPLOAD_TIME=NULL
7180
INDEXING_TIME=NULL
7281
SEARCH_TIME=NULL
7382
NO_UPSERT_SEARCH_TIME=NULL
83+
CPU=NULL
84+
CPU_TELEMETRY=NULL
7485

7586
RPS=$(jq -r '.results.rps' "$PARALLEL_SEARCH_RESULTS_FILE")
7687
MEAN_PRECISIONS=$(jq -r '.results.mean_precisions' "$PARALLEL_SEARCH_RESULTS_FILE")
@@ -82,14 +93,20 @@ NO_UPSERT_SEARCH_TIME=$(jq -r '.results.total_time' "$SEARCH_RESULT_FILE")
8293
UPLOAD_TIME=$(jq -r '.results.upload_time' "$PARALLEL_UPLOAD_RESULTS_FILE")
8394
INDEXING_TIME=$(jq -r '.results.total_time' "$PARALLEL_UPLOAD_RESULTS_FILE")
8495

96+
if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
97+
# Only this strategy produces cpu usage results files
98+
CPU=$(cat "$CPU_USAGE_FILE" | tr -d '[:space:]')
99+
fi
100+
CPU_TELEMETRY=$(jq -r '.result.hardware.collection_data.benchmark.cpu' "$TELEMETRY_API_RESPONSE_FILE")
101+
85102
QDRANT_COMMIT=$(jq -r '.commit' "$ROOT_API_RESPONSE_FILE")
86103

87104
MEASURE_TIMESTAMP=${MEASURE_TIMESTAMP:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}
88105

89106

90107
docker run --name "vector-db" --rm jbergknoff/postgresql-client "postgresql://qdrant:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/postgres" -c "
91-
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, search_time, no_upsert_search_time)
92-
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${SEARCH_TIME}, ${NO_UPSERT_SEARCH_TIME});
108+
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, search_time, no_upsert_search_time, cpu_telemetry, cpu)
109+
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${SEARCH_TIME}, ${NO_UPSERT_SEARCH_TIME}, ${CPU_TELEMETRY}, ${CPU});
93110
"
94111

95112
if [[ "$IS_CI_RUN" == "true" ]]; then
@@ -103,4 +120,7 @@ if [[ "$IS_CI_RUN" == "true" ]]; then
103120

104121
echo "upload_time=${UPLOAD_TIME}" >> "$GITHUB_OUTPUT"
105122
echo "indexing_time=${INDEXING_TIME}" >> "$GITHUB_OUTPUT"
123+
124+
echo "cpu_telemetry=${CPU_TELEMETRY}" >> "$GITHUB_OUTPUT"
125+
echo "cpu=${CPU}" >> "$GITHUB_OUTPUT"
106126
fi

tools/upload_results_postgres.sh

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
# p95_time real,
1919
# p99_time real,
2020
# vm_rss_mem real,
21-
# rss_anon_mem real
22-
# collection_load_time_ms real
21+
# rss_anon_mem real,
22+
# collection_load_time_ms real,
23+
# cpu real,
24+
# cpu_telemetry real,
2325
# );
2426

2527
SEARCH_RESULTS_FILE=${SEARCH_RESULTS_FILE:-""}
@@ -69,15 +71,25 @@ if [[ -z "$ROOT_API_RESPONSE_FILE" ]]; then
6971
exit 1
7072
fi
7173

74+
if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
75+
if [[ -z "$CPU_USAGE_FILE" ]]; then
76+
echo "CPU_USAGE_FILE is not set"
77+
exit 1
78+
fi
79+
fi
80+
7281
COLLECTION_LOAD_TIME=NULL
7382
RPS=NULL
7483
MEAN_PRECISIONS=NULL
7584
P95_TIME=NULL
7685
P99_TIME=NULL
7786
UPLOAD_TIME=NULL
7887
INDEXING_TIME=NULL
88+
CPU=NULL
89+
CPU_TELEMETRY=NULL
7990

8091
if [[ "$BENCHMARK_STRATEGY" == "collection-reload" ]]; then
92+
# this strategy does not produce search & upload results files
8193
echo "BENCHMARK_STRATEGY is $BENCHMARK_STRATEGY, upload telemetry"
8294
COLLECTION_LOAD_TIME=$(jq -r '.result.collections.collections[] | select(.id == "benchmark") | .init_time_ms' "$TELEMETRY_API_RESPONSE_FILE")
8395
else
@@ -94,14 +106,19 @@ fi
94106
VM_RSS_MEMORY_USAGE=$(cat "$VM_RSS_MEMORY_USAGE_FILE" | tr -d '[:space:]')
95107
RSS_ANON_MEMORY_USAGE=$(cat "$RSS_ANON_MEMORY_USAGE_FILE" | tr -d '[:space:]')
96108

109+
if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
110+
# Only this strategy produces cpu usage results files
111+
CPU=$(cat "$CPU_USAGE_FILE" | tr -d '[:space:]')
112+
fi
113+
CPU_TELEMETRY=$(jq -r '.result.hardware.collection_data.benchmark.cpu' "$TELEMETRY_API_RESPONSE_FILE")
114+
97115
QDRANT_COMMIT=$(jq -r '.commit' "$ROOT_API_RESPONSE_FILE")
98116

99117
MEASURE_TIMESTAMP=${MEASURE_TIMESTAMP:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}
100118

101-
102119
docker run --name "vector-db" --rm jbergknoff/postgresql-client "postgresql://qdrant:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/postgres" -c "
103-
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, vm_rss_mem, rss_anon_mem, collection_load_time_ms)
104-
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${VM_RSS_MEMORY_USAGE}, ${RSS_ANON_MEMORY_USAGE}, ${COLLECTION_LOAD_TIME});
120+
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, vm_rss_mem, rss_anon_mem, collection_load_time_ms, cpu_telemetry, cpu)
121+
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${VM_RSS_MEMORY_USAGE}, ${RSS_ANON_MEMORY_USAGE}, ${COLLECTION_LOAD_TIME}, ${CPU_TELEMETRY}, ${CPU});
105122
"
106123

107124
if [[ "$IS_CI_RUN" == "true" ]]; then
@@ -117,4 +134,7 @@ if [[ "$IS_CI_RUN" == "true" ]]; then
117134

118135
echo "upload_time=${UPLOAD_TIME}" >> "$GITHUB_OUTPUT"
119136
echo "indexing_time=${INDEXING_TIME}" >> "$GITHUB_OUTPUT"
137+
138+
echo "cpu_telemetry=${CPU_TELEMETRY}" >> "$GITHUB_OUTPUT"
139+
echo "cpu=${CPU}" >> "$GITHUB_OUTPUT"
120140
fi

0 commit comments

Comments
 (0)