Skip to content

Commit fec61b5

Browse files
committed
add changes for running cohere wiki benchmark
1 parent 9bb99ab commit fec61b5

File tree

6 files changed

+35
-7
lines changed

6 files changed

+35
-7
lines changed

datasets/datasets.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,5 +354,13 @@
354354
"type": "tar",
355355
"link": "https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_keywords_1m_vocab_10_no_filters.tgz",
356356
"path": "random-100-match-kw-small-vocab/random_keywords_1m_vocab_10_no_filters"
357+
},
358+
{
359+
"name": "cohere-wiki-50m-test-only",
360+
"vector_size": 768,
361+
"distance": "cosine",
362+
"type": "tar",
363+
"link": "https://storage.googleapis.com/ann-filtered-benchmark/datasets/cohere-wiki-50m-test-only.tgz",
364+
"path": "cohere-wiki-50m/cohere_wiki_50m"
357365
}
358366
]

engine/clients/qdrant/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
import os
22

33
QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "benchmark")
4+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", None)

engine/clients/qdrant/configure.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from benchmark.dataset import Dataset
55
from engine.base_client.configure import BaseConfigurator
66
from engine.base_client.distances import Distance
7-
from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME
7+
from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME, QDRANT_API_KEY
88

99

1010
class QdrantConfigurator(BaseConfigurator):
@@ -32,7 +32,7 @@ class QdrantConfigurator(BaseConfigurator):
3232
def __init__(self, host, collection_params: dict, connection_params: dict):
3333
super().__init__(host, collection_params, connection_params)
3434

35-
self.client = QdrantClient(host=host, **connection_params)
35+
self.client = QdrantClient(url=host, api_key=QDRANT_API_KEY, **connection_params)
3636

3737
def clean(self):
3838
self.client.delete_collection(collection_name=QDRANT_COLLECTION_NAME)

engine/clients/qdrant/search.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from dataset_reader.base_reader import Query
1010
from engine.base_client.search import BaseSearcher
11-
from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME
11+
from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME, QDRANT_API_KEY
1212
from engine.clients.qdrant.parser import QdrantConditionParser
1313

1414

@@ -22,8 +22,9 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic
2222
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "true"
2323
os.environ["GRPC_POLL_STRATEGY"] = "epoll,poll"
2424
cls.client: QdrantClient = QdrantClient(
25-
host,
25+
url=host,
2626
prefer_grpc=True,
27+
api_key=QDRANT_API_KEY,
2728
limits=httpx.Limits(max_connections=None, max_keepalive_connections=0),
2829
**connection_params,
2930
)

engine/clients/qdrant/upload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from dataset_reader.base_reader import Record
1515
from engine.base_client.upload import BaseUploader
16-
from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME
16+
from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME, QDRANT_API_KEY
1717

1818

1919
class QdrantUploader(BaseUploader):
@@ -24,7 +24,7 @@ class QdrantUploader(BaseUploader):
2424
def init_client(cls, host, distance, connection_params, upload_params):
2525
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "true"
2626
os.environ["GRPC_POLL_STRATEGY"] = "epoll,poll"
27-
cls.client = QdrantClient(host=host, prefer_grpc=True, **connection_params)
27+
cls.client = QdrantClient(url=host, prefer_grpc=True, api_key=QDRANT_API_KEY, **connection_params)
2828
cls.upload_params = upload_params
2929

3030
@classmethod

experiments/configurations/qdrant-single-node.json

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,25 @@
3939
"config": {
4040
"hnsw_ef": 256,
4141
"quantization": {
42-
"oversampling": 2.0
42+
"oversampling": 2.0,
43+
"rescore": true
44+
}
45+
}
46+
}
47+
],
48+
"upload_params": { "parallel": 16, "batch_size": 1024 }
49+
},
50+
{
51+
"name": "qdrant-rescore-only",
52+
"engine": "qdrant",
53+
"connection_params": { "timeout": 30 },
54+
"search_params": [
55+
{
56+
"parallel": 8,
57+
"config": {
58+
"hnsw_ef": 128,
59+
"quantization": {
60+
"rescore": true
4361
}
4462
}
4563
}

0 commit comments

Comments
 (0)