Skip to content

Commit ea73883

Browse files
feat: make the score in VectorStoreResult consistent (#482)
1 parent 454c18a commit ea73883

File tree

22 files changed

+262
-79
lines changed

22 files changed

+262
-79
lines changed

examples/document-search/chroma.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ async def main() -> None:
8484
index_name="jokes",
8585
default_options=VectorStoreOptions(
8686
k=10,
87-
max_distance=0.22,
87+
score_threshold=0.88,
8888
),
8989
embedder=embedder,
9090
)
@@ -103,7 +103,7 @@ async def main() -> None:
103103
query = "I'm boiling my water and I need a joke"
104104
vector_store_kwargs = {
105105
"k": 2,
106-
"max_distance": 0.6,
106+
"score_threshold": 0.4,
107107
}
108108
results = await document_search.search(
109109
query,

examples/document-search/configurable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class to rephrase the query.
7373
"distance_method": "l2",
7474
"default_options": {
7575
"k": 3,
76-
"max_distance": 1.2,
76+
"score_threshold": -1.2,
7777
},
7878
"embedder": {
7979
"type": "ragbits.core.embeddings.litellm:LiteLLMEmbedder",

examples/document-search/otel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ async def main() -> None:
121121
query = "I'm boiling my water and I need a joke"
122122
vector_store_kwargs = {
123123
"k": 2,
124-
"max_distance": None,
124+
"score_threshold": None,
125125
}
126126
results = await document_search.search(
127127
query,

examples/document-search/qdrant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ async def main() -> None:
9494
query = "I'm boiling my water and I need a joke"
9595
vector_store_kwargs = {
9696
"k": 2,
97-
"max_distance": 0.6,
97+
"score_threshold": 0.6,
9898
}
9999
results = await document_search.search(
100100
query,

examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ config:
66
distance_method: l2
77
default_options:
88
k: 3
9-
max_distance: 1.2
9+
score_threshold: -1.2
1010
embedder:
1111
type: ragbits.core.embeddings.litellm:LiteLLMEmbedder
1212
config:

examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma_optimization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ config:
66
distance_method: l2
77
default_options:
88
k: 3
9-
max_distance: 1.2
9+
score_threshold: -1.2
1010
embedder:
1111
type: ragbits.core.embeddings.litellm:LiteLLMEmbedder
1212
config:

examples/evaluation/document-search/basic/evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"distance_method": "l2",
3636
"default_options": {
3737
"k": 3,
38-
"max_distance": 1.2,
38+
"score_threshold": -1.2,
3939
},
4040
"embedder": {
4141
"type": "ragbits.core.embeddings.litellm:LiteLLMEmbedder",

packages/ragbits-core/CHANGELOG.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# CHANGELOG
22

33
## Unreleased
4-
4+
- Make the score in VectorStoreResult consistent (always bigger is better)
55
- Add router option to LiteLLMEmbedder (#440)
66
- Fix: make unflatten_dict symmetric to flatten_dict (#461)
77

@@ -11,7 +11,6 @@
1111
- Fix Qdrant vector store serialization (#419)
1212

1313
## 0.11.0 (2025-03-25)
14-
1514
- Add HybridSearchVectorStore which can aggregate results from multiple VectorStores (#412)
1615

1716
## 0.10.2 (2025-03-21)

packages/ragbits-core/src/ragbits/core/vector_stores/_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async def run() -> None:
101101
def query(
102102
text: Annotated[str, typer.Argument(help="Text to query the vector store with")],
103103
k: Annotated[int, typer.Option(help="Number of entries to retrieve")] = 5,
104-
max_distance: Annotated[float | None, typer.Option(help="Maximum distance to the query vector")] = None,
104+
score_threshold: Annotated[float | None, typer.Option(help="Minimum score for result to be returned")] = None,
105105
columns: Annotated[
106106
str,
107107
typer.Option(
@@ -117,7 +117,7 @@ def query(
117117
async def run() -> None:
118118
if state.vector_store is None:
119119
raise ValueError("Vector store not initialized")
120-
options = VectorStoreOptions(k=k, max_distance=max_distance)
120+
options = VectorStoreOptions(k=k, score_threshold=score_threshold)
121121

122122
entries = await state.vector_store.retrieve(
123123
text=text,

packages/ragbits-core/src/ragbits/core/vector_stores/base.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ class VectorStoreResult(BaseModel):
4444
"""
4545
An object representing a query result from a vector store.
4646
Contains the entry, its vector, and the similarity score.
47+
48+
The bigger the score, the more similar the entry is to the query.
49+
This holds even when using metrics that naturally follow the
50+
opposite convention (e.g. Euclidean distance). In this case,
51+
the score is simply the negative of the distance.
4752
"""
4853

4954
entry: VectorStoreEntry
@@ -54,10 +59,17 @@ class VectorStoreResult(BaseModel):
5459
class VectorStoreOptions(Options):
5560
"""
5661
An object representing the options for the vector store.
62+
63+
Attributes:
64+
k: The number of entries to return.
65+
score_threshold: The minimum similarity score for an entry to be returned.
66+
Note that this is based on score, which may be different from the raw
67+
similarity metric used by the vector store (see `VectorStoreResult`
68+
for more details).
5769
"""
5870

5971
k: int = 5
60-
max_distance: float | None = None
72+
score_threshold: float | None = None
6173

6274

6375
VectorStoreOptionsT = TypeVar("VectorStoreOptionsT", bound=VectorStoreOptions)

0 commit comments

Comments
 (0)