Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/manual-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,47 @@ on:
type: boolean
description: "Enable all feature flags (false by default)"
default: false
build_vector_db_image:
required: false
description: "Build vector DB image from source branch and use it (false by default)"
default: "false"

# Restrict to only running workflows of this group one at a time.
# Any new runs will be queued until the previous run is complete.
# Any existing pending runs will be cancelled and replaced with current run.
concurrency:
group: continuous-benchmark

jobs:
runManualBenchmark:
name: manual benchmark - ${{ inputs.qdrant_version }} - ${{ inputs.dataset }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v3
- uses: webfactory/[email protected]
with:
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
- name: Set up Docker Buildx
if: ${{ inputs.build_vector_db_image == 'true' }}
uses: docker/setup-buildx-action@v3
- name: Login to ghcr.io
if: ${{ inputs.build_vector_db_image == 'true' }}
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build Vector DB image
if: ${{ inputs.build_vector_db_image == 'true' }}
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: ghcr.io/${{ github.actor }}/vector-db-benchmark:${{ github.ref_name }}
provenance: false
- name: Benches
run: |
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
Expand All @@ -37,5 +67,9 @@ jobs:
export ENGINE_NAME=${{ inputs.engine_config }}
export POSTGRES_TABLE=benchmark_manual
export QDRANT__FEATURE_FLAGS__ALL=${{ inputs.feature_flags_all }}
if [ "${{ inputs.build_vector_db_image }}" = "true" ]; then
export VECTOR_DB_BENCHMARK_IMAGE=ghcr.io/${{ github.actor }}/vector-db-benchmark:${{ github.ref_name }}
export GHCR_PASSWORD=${{ secrets.GITHUB_TOKEN }}
fi
bash -x tools/setup_ci.sh
bash -x tools/run_ci.sh
6 changes: 6 additions & 0 deletions .github/workflows/manual-compare-versions-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ on:
description: "Enable all feature flags (false by default), version 2"
default: false

# Restrict to only running this workflow one at a time.
# Any new runs will be queued until the previous run is complete.
# Any existing pending runs will be cancelled and replaced with current run.
concurrency:
group: continuous-benchmark

jobs:
prepareImage1:
name: Prepare image ${{ inputs.qdrant_version_1 }}
Expand Down
7 changes: 7 additions & 0 deletions tools/run_client_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ FETCH_ALL_RESULTS=${FETCH_ALL_RESULTS:-"false"}

PRIVATE_IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_private_ip.sh" "$BENCH_SERVER_NAME")

VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE:-"qdrant/vector-db-benchmark:latest"}
GHCR_PASSWORD=${GHCR_PASSWORD:-""}

if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment_snapshot.sh"
scp "${SCRIPT_PATH}/../datasets/datasets.json" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/datasets.json"
Expand All @@ -36,6 +39,8 @@ if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
SNAPSHOT_URL=${SNAPSHOT_URL} \
VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE} \
GHCR_PASSWORD=${GHCR_PASSWORD} \
bash ~/run_experiment_snapshot.sh"

ssh -tt -o ServerAliveInterval=120 -o ServerAliveCountMax=10 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
Expand All @@ -48,6 +53,8 @@ else
DATASETS=${DATASETS} \
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE} \
GHCR_PASSWORD=${GHCR_PASSWORD} \
bash ~/run_experiment.sh"

ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
Expand Down
23 changes: 17 additions & 6 deletions tools/run_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
PS4='ts=$(date "+%Y-%m-%dT%H:%M:%SZ") level=DEBUG line=$LINENO file=$BASH_SOURCE '
set -euo pipefail

VECTOR_DB_BENCHMARK_IMAGE=${VECTOR_DB_BENCHMARK_IMAGE:-"qdrant/vector-db-benchmark:latest"}
GHCR_PASSWORD=${GHCR_PASSWORD:-""}

if [[ -n "${GHCR_PASSWORD}" ]] || [[ "${VECTOR_DB_BENCHMARK_IMAGE}" == ghcr.io/* ]]; then
if [[ -z "${GHCR_PASSWORD}" ]]; then
echo "GHCR_PASSWORD is required to pull images from ghcr.io"
exit 1
fi
echo "${GHCR_PASSWORD}" | docker login ghcr.io -u qdrant --password-stdin
fi

ENGINE_NAME=${ENGINE_NAME:-"qdrant-continuous-benchmark"}

DATASETS=${DATASETS:-""}
Expand Down Expand Up @@ -42,7 +53,7 @@ if [[ "$EXPERIMENT_MODE" != "snapshot" ]]; then
docker container rm -f ci-benchmark-upload || true
docker container rm -f ci-benchmark-search || true

docker rmi --force qdrant/vector-db-benchmark:latest || true
docker rmi --force "${VECTOR_DB_BENCHMARK_IMAGE}" || true
fi

echo "Ensure datasets volume exists and contains latest datasets.json"
Expand All @@ -62,7 +73,7 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "upload" ]]; t
--name ci-benchmark-upload \
-v "$HOME/results:/code/results" \
-v "ci-datasets:/code/datasets" \
qdrant/vector-db-benchmark:latest \
"${VECTOR_DB_BENCHMARK_IMAGE}" \
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-search
fi

Expand All @@ -81,23 +92,23 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "search" ]]; t
--name ci-benchmark-search \
-v "$HOME/results:/code/results" \
-v "ci-datasets:/code/datasets" \
qdrant/vector-db-benchmark:latest \
"${VECTOR_DB_BENCHMARK_IMAGE}" \
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload
fi


if [[ "$EXPERIMENT_MODE" == "parallel" ]]; then
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"

docker pull qdrant/vector-db-benchmark:latest
docker pull "${VECTOR_DB_BENCHMARK_IMAGE}"

echo "Starting ci-benchmark-upload container"
docker run \
--rm \
--name ci-benchmark-upload \
-v "$HOME/results/parallel:/code/results" \
-v "ci-datasets:/code/datasets" \
qdrant/vector-db-benchmark:latest \
"${VECTOR_DB_BENCHMARK_IMAGE}" \
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-search --skip-configure &
UPLOAD_PID=$!

Expand All @@ -107,7 +118,7 @@ if [[ "$EXPERIMENT_MODE" == "parallel" ]]; then
--name ci-benchmark-search \
-v "$HOME/results/parallel:/code/results" \
-v "ci-datasets:/code/datasets" \
qdrant/vector-db-benchmark:latest \
"${VECTOR_DB_BENCHMARK_IMAGE}" \
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload &
SEARCH_PID=$!

Expand Down