Skip to content

Commit 9ae9839

Browse files
Add delete-n-update benchmark (#253)
* Add delete-n-update benchmark * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 4136dbe commit 9ae9839

File tree

15 files changed

+704
-0
lines changed

15 files changed

+704
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Create inventory
2+
description: "Prepare inventory.ini"
3+
inputs:
4+
hcloud_token:
5+
description: "Hetzner Cloud API token"
6+
required: true
7+
server_name:
8+
description: "Name of the server"
9+
required: true
10+
db_host:
11+
description: "Database host"
12+
required: true
13+
14+
runs:
15+
using: "composite"
16+
steps:
17+
- name: Create inventory.ini
18+
shell: bash
19+
run: |
20+
export HCLOUD_TOKEN=${{ inputs.hcloud_token }}
21+
export POSTGRES_HOST=${{ inputs.db_host }}
22+
export SERVER_NAME="${{ inputs.server_name }}"
23+
24+
# Download and install hcloud
25+
HCVERSION=v1.36.0
26+
wget https://github.com/hetznercloud/cli/releases/download/${HCVERSION}/hcloud-linux-amd64.tar.gz
27+
tar xzf hcloud-linux-amd64.tar.gz
28+
mv hcloud /usr/local/bin
29+
chmod +x /usr/local/bin/hcloud
30+
31+
IP_OF_THE_SERVER=$(bash "tools/hetzner/get_public_ip.sh" "$SERVER_NAME")
32+
33+
# Create ansible inventory.ini file
34+
cat <<EOL > inventory.ini
35+
[remote_machines]
36+
benchmark-machine ansible_host=${IP_OF_THE_SERVER} ansible_user=root
37+
[db_hosts]
38+
benchmark-db ansible_host=${POSTGRES_HOST} ansible_user=root
39+
EOL
40+
41+
mv inventory.ini ansible/playbooks/inventory.ini
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Continuous Benchmark Hnsw Indexing
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
# Run every day at 5am
7+
- cron: "0 5 * * *"
8+
9+
# Restrict to only running this workflow one at a time.
10+
# Any new runs will be queued until the previous run is complete.
11+
# Any existing pending runs will be cancelled and replaced with current run.
12+
concurrency:
13+
group: continuous-benchmark
14+
15+
jobs:
16+
runUpdateHealingBenchmark:
17+
runs-on: ubuntu-latest
18+
container: alpine/ansible:2.18.1
19+
steps:
20+
- uses: actions/checkout@v3
21+
- uses: webfactory/[email protected]
22+
with:
23+
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
24+
- name: Create inventory
25+
uses: ./.github/workflows/actions/create-inventory
26+
with:
27+
hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
28+
db_host: ${{ secrets.POSTGRES_HOST }}
29+
server_name: "benchmark-server-3"
30+
- name: Run bench
31+
id: hnsw-indexing-update
32+
run: |
33+
cd ansible/playbooks && ansible-playbook playbook-hnsw-index.yml --extra-vars "bench=update"

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ NOTES.md
66

77
results/*
88
tools/custom/data.json
9+
10+
ansible/playbooks/inventory.ini

ansible/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM python:3.11-slim
2+
3+
# Set environment variables to prevent prompts during package installation
4+
ENV DEBIAN_FRONTEND=noninteractive
5+
6+
RUN apt-get update && apt-get install -y \
7+
sshpass \
8+
git \
9+
&& rm -rf /var/lib/apt/lists/*
10+
11+
RUN pip install --no-cache-dir ansible
12+
13+
RUN ansible --version
14+
15+
WORKDIR /ansible/playbooks
16+
17+
CMD ["ansible-playbook", "--version"]

ansible/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
## How to run
2+
3+
### Prerequisites
4+
* ssh keys (to connect to the remote machines)
5+
* inventory.ini (to define the actual machine on which the benchmark is run)
6+
7+
Add inventory.ini in [ansible/playbooks/](playbooks) with the following content:
8+
```ini
9+
[remote_machines]
10+
;note that machine's name should be benchmark-machine
11+
benchmark-machine ansible_host=${YOUR_SERVER_IP} ansible_user=${YOUR_USER}
12+
[db_hosts]
13+
benchmark-db ansible_host=${YOUR_SERVER_IP} ansible_user=${YOUR_USER}
14+
```
15+
16+
### Run ansible inside Docker
17+
Ensure the ssh keys are properly mounted into the container.
18+
19+
Run the following commands from [ansible](.):
20+
```bash
21+
docker buildx build --tag vector-db-benchmark-ansible:latest -f Dockerfile .
22+
docker run --rm -it -v ~/.ssh/id_rsa:/root/.ssh/id_rsa -v ~/.ssh/id_rsa.pub:/root/.ssh/id_rsa.pub -v ./playbooks:/ansible/playbooks vector-db-benchmark-ansible ansible-playbook playbook-hnsw-index.yml --extra-vars "bench=update"
23+
```
24+
25+
### Run ansible locally
26+
The "local" run here means that the ansible command is run locally (so, Ansible should be installed locally).
27+
The actual machine on which the benchmark is run is defined by the inventory file (see Prerequisites).
28+
29+
Run the following commands from [ansible/playbooks](playbooks):
30+
```bash
31+
ansible-playbook playbook-hnsw-index.yml --extra-vars "bench=update"
32+
```
33+
34+
### Run ansible and benchmark locally
35+
The "local" run here means that the ansible command is run locally (so, Ansible should be installed locally) and the benchmark is run on the local machine.
36+
In [ansible/playbooks/](playbooks) add a file `inventory.ini` with the following content:
37+
```ini
38+
[remote_machines]
39+
;note that machine's name should be benchmark-machine
40+
benchmark-machine ansible_connection=local ansible_user=${YOUR_USER} ansible_become=false
41+
[db_hosts]
42+
benchmark-db ansible_host=${YOUR_DB_SERVER_IP} ansible_user=${YOUR_DB_SERVER_USER}
43+
```
44+
45+
Then from [ansible/playbooks](playbooks) run:
46+
```bash
47+
ansible-playbook playbook-hnsw-index.yml --extra-vars "bench=update"
48+
```

ansible/playbooks/ansible.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[defaults]
2+
host_key_checking = False
3+
inventory = inventory.ini
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
version: '3.7'
2+
3+
services:
4+
qdrant_bench:
5+
image: ${CONTAINER_REGISTRY:-docker.io}/qdrant/qdrant:${QDRANT_VERSION}
6+
container_name: qdrant-continuous
7+
ports:
8+
- "6333:6333"
9+
- "6334:6334"
10+
logging:
11+
driver: "json-file"
12+
options:
13+
max-file: 1
14+
max-size: 10m
15+
deploy:
16+
resources:
17+
limits:
18+
memory: 25Gb
19+
environment:
20+
- QDRANT__LOG_LEVEL=DEBUG
21+
- QDRANT__FEATURE_FLAGS__ALL=${QDRANT__FEATURE_FLAGS__ALL:-false}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
numpy>=2.2.5
2+
requests>=2.32.3
3+
tqdm>=4.67.1
4+
qdrant-client>=1.14.0
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
qdrant_python_client_version: "1.14.0"
2+
logging_dir: "/tmp/logs"
3+
working_dir: "/tmp/experiments"
4+
dataset_url: "https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_100K.tgz"
5+
dataset_name: "dbpedia_openai_100K"
6+
dataset_dim: "1536"
7+
servers:
8+
- name: "qdrant"
9+
registry: "ghcr.io"
10+
image: "qdrant/qdrant"
11+
version: "dev"
12+
feature_flags: "true"
13+
- name: "qdrant"
14+
registry: "docker.io"
15+
image: "qdrant/qdrant"
16+
version: "master"
17+
feature_flags: "false"
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
---
2+
- name: Run load
3+
hosts: remote_machines
4+
become: yes
5+
tasks:
6+
- name: Load common variables
7+
include_vars: "group_vars/hnsw-indexing-{{ bench | default('update') }}.yml"
8+
9+
- name: Ensure necessary packages are installed
10+
ansible.builtin.package:
11+
name: "{{ item }}"
12+
state: present
13+
loop:
14+
- wget
15+
- python3-venv
16+
17+
- name: Ensure Docker is installed
18+
ansible.builtin.command: docker --version
19+
register: docker_check
20+
failed_when: docker_check.rc not in [0, 127]
21+
22+
- name: Execute benchmark
23+
include_role:
24+
name: "run-hnsw-indexing-{{ bench | default('update') }}"
25+
loop: "{{ servers }}"
26+
loop_control:
27+
loop_var: "server"
28+
vars:
29+
server_name: "{{ server.name }}"
30+
server_version: "{{ server.version }}"
31+
server_registry: "{{ server.registry }}"
32+
server_feature_flags: "{{ server.feature_flags }}"
33+
bench: "{{ bench | default('update') }}"
34+
35+
- name: "Compare results on the remote machine"
36+
ansible.builtin.shell: |
37+
python3 "${WORK_DIR}/get_score.py"
38+
environment:
39+
WORK_DIR: "{{ working_dir }}"
40+
DATA_DIR: "{{ working_dir }}"
41+
BENCH: "{{ bench }}"
42+
SERVER_NAME: "{{ servers[0].name }}"
43+
SERVER_NAME_2: "{{ servers[1].name }}"
44+
SERVER_VERSION: "{{ servers[0].version }}"
45+
SERVER_VERSION_2: "{{ servers[1].version }}"
46+
register: "score_result"
47+
48+
- name: Extract precision_score and set fact
49+
set_fact:
50+
precision_score: "{{ score_result.stdout }}"
51+
52+
- name: Export data into postgres
53+
hosts: db_hosts
54+
tasks:
55+
- name: Load common variables
56+
include_vars: "group_vars/hnsw-indexing-{{ bench | default('update') }}.yml"
57+
58+
- name: Insert data into table
59+
ansible.builtin.shell: |
60+
result="{{ hostvars['benchmark-machine']['precision_score'] }}"
61+
engine="{{ server.name }}-{{ server.version }}"
62+
score=$(echo "$result" | grep -oP "${engine}_score=\K[^,]+")
63+
indexing_time_s=$(echo "$result" | grep -oP "${engine}_indexing_time=\K[^,]+")
64+
65+
MEASURE_TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
66+
67+
pg_query="INSERT INTO hnsw_indexing (
68+
engine,
69+
engine_version,
70+
dataset,
71+
measure_timestamp,
72+
bench_name,
73+
precision_score,
74+
indexing_time_s
75+
) VALUES (
76+
'{{ server.name }}',
77+
'{{ server.version }}',
78+
'{{ dataset_name }}',
79+
'${MEASURE_TIMESTAMP}',
80+
'{{ bench }}',
81+
'${score}',
82+
'${indexing_time_s}'
83+
);"
84+
docker exec -it qdrant-postgres psql -U qdrant -d postgres -c "${pg_query}"
85+
loop: "{{ servers }}"
86+
loop_control:
87+
loop_var: "server"

0 commit comments

Comments
 (0)