Skip to content

Commit 9849594

Browse files
Run ci with payload (#231)
* Add with_payload param * Update poetry * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a7e51fa commit 9849594

File tree

7 files changed

+494
-141
lines changed

7 files changed

+494
-141
lines changed

.github/workflows/continuous-benchmark.yaml

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ name: Continuous Benchmark
33
on:
44
repository_dispatch:
55
workflow_dispatch:
6+
inputs:
7+
with_payload:
8+
description: 'Flag that controls whether to search with or without payload (false or true)'
9+
default: false
610
schedule:
711
# Run every 4 hours
812
- cron: "0 */4 * * *"
@@ -27,13 +31,16 @@ jobs:
2731
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
2832
export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
2933
export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
34+
export WITH_PAYLOAD=${{ inputs.with_payload }}
35+
export SUFFIX=$([ "${WITH_PAYLOAD}" = "true" ] && echo "-with-payload" || echo "")
36+
3037
bash -x tools/setup_ci.sh
3138
3239
declare -A DATASET_TO_ENGINE
33-
DATASET_TO_ENGINE["laion-small-clip"]="qdrant-continuous-benchmark"
34-
DATASET_TO_ENGINE["msmarco-sparse-100K"]="qdrant-sparse-vector"
35-
DATASET_TO_ENGINE["h-and-m-2048-angular-filters"]="qdrant-continuous-benchmark"
36-
DATASET_TO_ENGINE["dbpedia-openai-100K-1536-angular"]="qdrant-bq-continuous-benchmark"
40+
DATASET_TO_ENGINE["laion-small-clip"]="qdrant-continuous-benchmark${SUFFIX}"
41+
DATASET_TO_ENGINE["msmarco-sparse-100K"]="qdrant-sparse-vector${SUFFIX}"
42+
DATASET_TO_ENGINE["h-and-m-2048-angular-filters"]="qdrant-continuous-benchmark${SUFFIX}"
43+
DATASET_TO_ENGINE["dbpedia-openai-100K-1536-angular"]="qdrant-bq-continuous-benchmark${SUFFIX}"
3744
3845
set +e
3946
@@ -117,13 +124,16 @@ jobs:
117124
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
118125
export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
119126
export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
127+
export WITH_PAYLOAD=${{ inputs.with_payload }}
128+
export SUFFIX=$([ "${WITH_PAYLOAD}" = "true" ] && echo "-with-payload" || echo "")
129+
120130
bash -x tools/setup_ci.sh
121131
122132
set +e
123133
124134
# Benchmark filtered search by tenants with mem limitation
125135
126-
export ENGINE_NAME="qdrant-all-on-disk-scalar-q"
136+
export ENGINE_NAME="qdrant-all-on-disk-scalar-q${SUFFIX}"
127137
export DATASETS="random-768-100-tenants"
128138
export BENCHMARK_STRATEGY="tenants"
129139
export CONTAINER_MEM_LIMIT=160mb
@@ -203,13 +213,16 @@ jobs:
203213
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
204214
export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
205215
export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
216+
export WITH_PAYLOAD=${{ inputs.with_payload }}
217+
export SUFFIX=$([ "${WITH_PAYLOAD}" = "true" ] && echo "-with-payload" || echo "")
218+
206219
bash -x tools/setup_ci.sh
207220
208221
set +e
209222
210223
# Benchmark parallel search&upload
211224
212-
export ENGINE_NAME="qdrant-continuous-benchmark"
225+
export ENGINE_NAME="qdrant-continuous-benchmark${SUFFIX}"
213226
export DATASETS="laion-small-clip"
214227
export BENCHMARK_STRATEGY="parallel"
215228
export POSTGRES_TABLE="benchmark_parallel_search_upload"

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ENV PYTHONFAULTHANDLER=1 \
66
PIP_NO_CACHE_DIR=off \
77
PIP_DISABLE_PIP_VERSION_CHECK=on \
88
PIP_DEFAULT_TIMEOUT=100 \
9-
POETRY_VERSION=1.5.1
9+
POETRY_VERSION=2.1.2
1010

1111
RUN pip install "poetry==$POETRY_VERSION"
1212

@@ -16,7 +16,7 @@ COPY poetry.lock pyproject.toml /code/
1616

1717
# Project initialization:
1818
RUN poetry config virtualenvs.create false \
19-
&& poetry install --no-dev --no-interaction --no-ansi
19+
&& poetry --no-interaction --no-ansi install --without dev
2020

2121
# Creating folders, and files for a project:
2222
COPY . /code

engine/clients/qdrant/search.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]:
5454
query_filter=cls.parser.parse(query.meta_conditions),
5555
limit=top,
5656
search_params=rest.SearchParams(**cls.search_params.get("config", {})),
57+
with_payload=cls.search_params.get("with_payload", False),
5758
)
5859
except Exception as ex:
5960
print(f"Something went wrong during search: {ex}")

experiments/configurations/qdrant-on-disk.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,24 @@
3030
{ "parallel": 8 }
3131
],
3232
"upload_params": { "parallel": 4 }
33+
},
34+
{
35+
"name": "qdrant-all-on-disk-scalar-q-with-payload",
36+
"engine": "qdrant",
37+
"connection_params": {},
38+
"collection_params": {
39+
"optimizers_config": { "default_segment_number": 17 },
40+
"quantization_config": { "scalar": {"type": "int8", "quantile": 0.99, "always_ram": false} },
41+
"vectors_config": { "on_disk": true },
42+
"hnsw_config": { "on_disk": true, "m": 0, "payload_m": 16 },
43+
"on_disk_payload": true,
44+
"payload_index_params": {
45+
"a": { "is_tenant": true, "on_disk": true }
46+
}
47+
},
48+
"search_params": [
49+
{ "parallel": 8, "with_payload": true }
50+
],
51+
"upload_params": { "parallel": 4 }
3352
}
3453
]

experiments/configurations/qdrant-single-node.json

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,41 @@
4545
],
4646
"upload_params": { "parallel": 16, "batch_size": 1024 }
4747
},
48+
{
49+
"name": "qdrant-continuous-benchmark-with-payload",
50+
"engine": "qdrant",
51+
"connection_params": { "timeout": 30 },
52+
"collection_params": {
53+
"hnsw_config": {
54+
"m": 32,
55+
"ef_construct": 256
56+
},
57+
"quantization_config": {
58+
"scalar": {
59+
"type": "int8",
60+
"quantile": 0.99
61+
}
62+
},
63+
"optimizers_config": {
64+
"max_segment_size": 1000000,
65+
"default_segment_number": 3,
66+
"memmap_threshold": 10000000
67+
}
68+
},
69+
"search_params": [
70+
{
71+
"parallel": 8,
72+
"config": {
73+
"hnsw_ef": 256,
74+
"quantization": {
75+
"oversampling": 2.0
76+
}
77+
},
78+
"with_payload": true
79+
}
80+
],
81+
"upload_params": { "parallel": 16, "batch_size": 1024 }
82+
},
4883
{
4984
"name": "qdrant-bq-continuous-benchmark",
5085
"engine": "qdrant",
@@ -72,6 +107,34 @@
72107
],
73108
"upload_params": { "parallel": 16, "batch_size": 1024 }
74109
},
110+
{
111+
"name": "qdrant-bq-continuous-benchmark-with-payload",
112+
"engine": "qdrant",
113+
"connection_params": { "timeout": 30 },
114+
"collection_params": {
115+
"hnsw_config": {
116+
"m": 32,
117+
"ef_construct": 256
118+
},
119+
"quantization_config": { "binary": {"always_ram": true} },
120+
"optimizers_config": {
121+
"max_segment_size": 1000000,
122+
"default_segment_number": 3,
123+
"memmap_threshold": 10000000
124+
}
125+
},
126+
"search_params": [
127+
{
128+
"parallel": 8,
129+
"config": {
130+
"hnsw_ef": 256,
131+
"quantization": { "rescore": true, "oversampling": 2.0 }
132+
},
133+
"with_payload": true
134+
}
135+
],
136+
"upload_params": { "parallel": 16, "batch_size": 1024 }
137+
},
75138
{
76139
"name": "qdrant-sparse-vector",
77140
"engine": "qdrant",
@@ -91,6 +154,26 @@
91154
],
92155
"upload_params": { "parallel": 16, "batch_size": 1024 }
93156
},
157+
{
158+
"name": "qdrant-sparse-vector-with-payload",
159+
"engine": "qdrant",
160+
"connection_params": { "timeout": 30 },
161+
"collection_params": {
162+
"optimizers_config": {
163+
"max_segment_size": 1000000,
164+
"default_segment_number": 3,
165+
"memmap_threshold": 10000000
166+
}
167+
},
168+
"search_params": [
169+
{
170+
"parallel": 8,
171+
"search_params": {},
172+
"with_payload": true
173+
}
174+
],
175+
"upload_params": { "parallel": 16, "batch_size": 1024 }
176+
},
94177
{
95178
"name": "qdrant-parallel",
96179
"engine": "qdrant",

0 commit comments

Comments
 (0)