Skip to content

Commit b42bda8

Browse files
authored
Allow building vector_db image in manual workflow (#268)
* Add manual workflows to concurrency group * Allow to build image
1 parent 73c3b6c commit b42bda8

File tree

4 files changed

+64
-6
lines changed

4 files changed

+64
-6
lines changed

.github/workflows/manual-benchmark.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,47 @@ on:
1616
type: boolean
1717
description: "Enable all feature flags (false by default)"
1818
default: false
19+
build_vector_db_image:
20+
required: false
21+
description: "Build vector DB image from source branch and use it (false by default)"
22+
default: "false"
1923

24+
# Restrict to only running workflows of this group one at a time.
25+
# Any new runs will be queued until the previous run is complete.
26+
# Any existing pending runs will be cancelled and replaced with current run.
27+
concurrency:
28+
group: continuous-benchmark
2029

2130
jobs:
2231
runManualBenchmark:
2332
name: manual benchmark - ${{ inputs.qdrant_version }} - ${{ inputs.dataset }}
2433
runs-on: ubuntu-latest
34+
permissions:
35+
contents: read
36+
packages: write
2537
steps:
2638
- uses: actions/checkout@v3
2739
- uses: webfactory/[email protected]
2840
with:
2941
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
42+
- name: Set up Docker Buildx
43+
if: ${{ inputs.build_vector_db_image == 'true' }}
44+
uses: docker/setup-buildx-action@v3
45+
- name: Login to ghcr.io
46+
if: ${{ inputs.build_vector_db_image == 'true' }}
47+
uses: docker/login-action@v3
48+
with:
49+
registry: ghcr.io
50+
username: ${{ github.actor }}
51+
password: ${{ secrets.GITHUB_TOKEN }}
52+
- name: Build Vector DB image
53+
if: ${{ inputs.build_vector_db_image == 'true' }}
54+
uses: docker/build-push-action@v6
55+
with:
56+
context: .
57+
push: true
58+
tags: ghcr.io/${{ github.actor }}/vector-db-benchmark:${{ github.ref_name }}
59+
provenance: false
3060
- name: Benches
3161
run: |
3262
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
@@ -37,5 +67,9 @@ jobs:
3767
export ENGINE_NAME=${{ inputs.engine_config }}
3868
export POSTGRES_TABLE=benchmark_manual
3969
export QDRANT__FEATURE_FLAGS__ALL=${{ inputs.feature_flags_all }}
70+
if [ "${{ inputs.build_vector_db_image }}" = "true" ]; then
71+
export VECTOR_DB_BENCHMARK_IMAGE=ghcr.io/${{ github.actor }}/vector-db-benchmark:${{ github.ref_name }}
72+
export GHCR_PASSWORD=${{ secrets.GITHUB_TOKEN }}
73+
fi
4074
bash -x tools/setup_ci.sh
4175
bash -x tools/run_ci.sh

.github/workflows/manual-compare-versions-benchmark.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ on:
2828
description: "Enable all feature flags (false by default), version 2"
2929
default: false
3030

31+
# Restrict to only running this workflow one at a time.
32+
# Any new runs will be queued until the previous run is complete.
33+
# Any existing pending runs will be cancelled and replaced with current run.
34+
concurrency:
35+
group: continuous-benchmark
36+
3137
jobs:
3238
prepareImage1:
3339
name: Prepare image ${{ inputs.qdrant_version_1 }}

tools/run_client_script.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ FETCH_ALL_RESULTS=${FETCH_ALL_RESULTS:-"false"}
2727

2828
PRIVATE_IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_private_ip.sh" "$BENCH_SERVER_NAME")
2929

30+
VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE:-"qdrant/vector-db-benchmark:latest"}
31+
GHCR_PASSWORD=${GHCR_PASSWORD:-""}
32+
3033
if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
3134
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment_snapshot.sh"
3235
scp "${SCRIPT_PATH}/../datasets/datasets.json" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/datasets.json"
@@ -36,6 +39,8 @@ if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
3639
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
3740
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
3841
SNAPSHOT_URL=${SNAPSHOT_URL} \
42+
VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE} \
43+
GHCR_PASSWORD=${GHCR_PASSWORD} \
3944
bash ~/run_experiment_snapshot.sh"
4045

4146
ssh -tt -o ServerAliveInterval=120 -o ServerAliveCountMax=10 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
@@ -48,6 +53,8 @@ else
4853
DATASETS=${DATASETS} \
4954
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
5055
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
56+
VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE} \
57+
GHCR_PASSWORD=${GHCR_PASSWORD} \
5158
bash ~/run_experiment.sh"
5259

5360
ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"

tools/run_experiment.sh

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@
33
PS4='ts=$(date "+%Y-%m-%dT%H:%M:%SZ") level=DEBUG line=$LINENO file=$BASH_SOURCE '
44
set -euo pipefail
55

6+
VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE:-"qdrant/vector-db-benchmark:latest"}
7+
GHCR_PASSWORD=${GHCR_PASSWORD:-""}
8+
9+
if [[ -n "${GHCR_PASSWORD}" ]] || [[ "${VECTOR_DB_BENCHMARK_IMAGE}" == ghcr.io/* ]]; then
10+
if [[ -z "${GHCR_PASSWORD}" ]]; then
11+
echo "GHCR_PASSWORD is required to pull images from ghcr.io"
12+
exit 1
13+
fi
14+
echo "${GHCR_PASSWORD}" | docker login ghcr.io -u qdrant --password-stdin
15+
fi
16+
617
ENGINE_NAME=${ENGINE_NAME:-"qdrant-continuous-benchmark"}
718

819
DATASETS=${DATASETS:-""}
@@ -42,7 +53,7 @@ if [[ "$EXPERIMENT_MODE" != "snapshot" ]]; then
4253
docker container rm -f ci-benchmark-upload || true
4354
docker container rm -f ci-benchmark-search || true
4455

45-
docker rmi --force qdrant/vector-db-benchmark:latest || true
56+
docker rmi --force "${VECTOR_DB_BENCHMARK_IMAGE}" || true
4657
fi
4758

4859
echo "Ensure datasets volume exists and contains latest datasets.json"
@@ -62,7 +73,7 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "upload" ]]; t
6273
--name ci-benchmark-upload \
6374
-v "$HOME/results:/code/results" \
6475
-v "ci-datasets:/code/datasets" \
65-
qdrant/vector-db-benchmark:latest \
76+
"${VECTOR_DB_BENCHMARK_IMAGE}" \
6677
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-search
6778
fi
6879

@@ -81,23 +92,23 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "search" ]]; t
8192
--name ci-benchmark-search \
8293
-v "$HOME/results:/code/results" \
8394
-v "ci-datasets:/code/datasets" \
84-
qdrant/vector-db-benchmark:latest \
95+
"${VECTOR_DB_BENCHMARK_IMAGE}" \
8596
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload
8697
fi
8798

8899

89100
if [[ "$EXPERIMENT_MODE" == "parallel" ]]; then
90101
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"
91102

92-
docker pull qdrant/vector-db-benchmark:latest
103+
docker pull "${VECTOR_DB_BENCHMARK_IMAGE}"
93104

94105
echo "Starting ci-benchmark-upload container"
95106
docker run \
96107
--rm \
97108
--name ci-benchmark-upload \
98109
-v "$HOME/results/parallel:/code/results" \
99110
-v "ci-datasets:/code/datasets" \
100-
qdrant/vector-db-benchmark:latest \
111+
"${VECTOR_DB_BENCHMARK_IMAGE}" \
101112
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-search --skip-configure &
102113
UPLOAD_PID=$!
103114

@@ -107,7 +118,7 @@ if [[ "$EXPERIMENT_MODE" == "parallel" ]]; then
107118
--name ci-benchmark-search \
108119
-v "$HOME/results/parallel:/code/results" \
109120
-v "ci-datasets:/code/datasets" \
110-
qdrant/vector-db-benchmark:latest \
121+
"${VECTOR_DB_BENCHMARK_IMAGE}" \
111122
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload &
112123
SEARCH_PID=$!
113124

0 commit comments

Comments
 (0)