Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 37 additions & 38 deletions libs/community/langchain_community/vectorstores/faiss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
import operator

Check failure on line 4 in libs/community/langchain_community/vectorstores/faiss.py

View workflow job for this annotation

GitHub Actions / cd libs/community / Python 3.11

Ruff (F401)

langchain_community/vectorstores/faiss.py:4:8: F401 `operator` imported but unused
import os
import pickle
import uuid
Expand Down Expand Up @@ -72,6 +72,17 @@
return


def _clamp01(x: float) -> float:
"""
Clamps a float value to the range [0.0, 1.0].

Args:
x: The float value to clamp.
"""
return 0.0 if x < 0.0 else 1.0 if x > 1.0 else x



class FAISS(VectorStore):
"""FAISS vector store integration.

Expand Down Expand Up @@ -142,17 +153,6 @@

* thud [{'bar': 'baz'}]

Search with filter:
.. code-block:: python

results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
for doc in results:
print(f"* {doc.page_content} [{doc.metadata}]")

.. code-block:: python

* thud [{'bar': 'baz'}]

Search with score:
.. code-block:: python

Expand All @@ -174,10 +174,7 @@
# await vector_store.adelete(ids=["3"])

# search
# results = vector_store.asimilarity_search(query="thud",k=1)

# search with score
results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
# results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
for doc,score in results:
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

Expand Down Expand Up @@ -434,19 +431,7 @@
else:
docs.append((doc, scores[0][j]))

score_threshold = kwargs.get("score_threshold")
if score_threshold is not None:
cmp = (
operator.ge
if self.distance_strategy
in (DistanceStrategy.MAX_INNER_PRODUCT, DistanceStrategy.JACCARD)
else operator.le
)
docs = [
(doc, similarity)
for doc, similarity in docs
if cmp(similarity, score_threshold)
]
# ⬇️ Removed raw-score threshold filtering here.
return docs[:k]

async def asimilarity_search_with_score_by_vector(
Expand Down Expand Up @@ -570,8 +555,8 @@
embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None. If a callable, it must take as input the
metadata dict of Document and return a bool.
Defaults to None. If a callable, it must take as input the
metadata dict of Document and return a bool.

fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
Defaults to 20.
Expand Down Expand Up @@ -1287,8 +1272,6 @@
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return docs and their similarity scores on a scale from 0 to 1."""
# Pop score threshold so that only relevancy scores, not raw scores, are
# filtered.
relevance_score_fn = self._select_relevance_score_fn()
if relevance_score_fn is None:
raise ValueError(
Expand All @@ -1303,9 +1286,18 @@
**kwargs,
)
docs_and_rel_scores = [
(doc, relevance_score_fn(score)) for doc, score in docs_and_scores
(doc, _clamp01(relevance_score_fn(score))) for doc, score in docs_and_scores

Check failure on line 1289 in libs/community/langchain_community/vectorstores/faiss.py

View workflow job for this annotation

GitHub Actions / cd libs/community / Python 3.11

Ruff (E501)

langchain_community/vectorstores/faiss.py:1289:89: E501 Line too long (92 > 88)
]
return docs_and_rel_scores


# ✅ filter once on normalized relevance
score_threshold = kwargs.pop("score_threshold", None)
if score_threshold is not None:
docs_and_rel_scores = [
(d, s) for (d, s) in docs_and_rel_scores if s >= score_threshold
]

return docs_and_rel_scores[:k]

async def _asimilarity_search_with_relevance_scores(
self,
Expand All @@ -1316,8 +1308,6 @@
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return docs and their similarity scores on a scale from 0 to 1."""
# Pop score threshold so that only relevancy scores, not raw scores, are
# filtered.
relevance_score_fn = self._select_relevance_score_fn()
if relevance_score_fn is None:
raise ValueError(
Expand All @@ -1332,9 +1322,18 @@
**kwargs,
)
docs_and_rel_scores = [
(doc, relevance_score_fn(score)) for doc, score in docs_and_scores
(doc, _clamp01(relevance_score_fn(score))) for doc, score in docs_and_scores
]
return docs_and_rel_scores


# ✅ filter once on normalized relevance
score_threshold = kwargs.pop("score_threshold", None)
if score_threshold is not None:
docs_and_rel_scores = [
(d, s) for (d, s) in docs_and_rel_scores if s >= score_threshold
]

return docs_and_rel_scores[:k]

@staticmethod
def _create_filter_func(
Expand Down
Loading