metadata filtering with BM25 Retriver #13978
Replies: 3 comments 2 replies
-
Hello, @VijayKumaraswamy! I'm here to help you with any issues you're facing. Let's tackle this problem together! To resolve the "Please pass exactly one of index, nodes, or docstore" error and correctly add metadata filters to both retrievers in your code, you need to ensure that you are passing only one of the required parameters ( Here is an updated version of your code with the necessary changes: import os
import openai
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.vector_stores import MetadataFilter, MetadataFilters, FilterOperator
from llama_index.core import VectorStoreIndex
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
import nest_asyncio
# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-lvksfbvhskvbdksvbksdfvbkfdsvhkb"
openai.api_key = os.environ["OPENAI_API_KEY"]
# Initialize Chroma client
chroma_client = chromadb.HttpClient(host='ab7a73436a8c04c269e8d27e651d3fea-389708969.ap-south-1.elb.amazonaws.com', port=8000)
# Load embedding model
embed_model = HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2")
# Initialize OpenAI LLM
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
# Define metadata filters
filters = MetadataFilters(
filters=[
MetadataFilter(
key="SAMSUNG", value="10-10-2019"
),
]
)
# Set up Chroma vector store and storage context
chroma_collection = chroma_client.get_collection("bank_mandate_forms_with_company_date_to_keys_summary_in_content")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# Create VectorStoreIndex
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context=storage_context, embed_model=embed_model)
# Create vector retriever with metadata filters
vector_retriever = index.as_retriever(similarity_top_k=2, filters=filters)
# Retrieve nodes with metadata filters
source_nodes = vector_retriever.retrieve(' ')
nodes = [x.node for x in source_nodes]
# Create BM25 retriever with nodes
bm25_retriever = BM25Retriever.from_defaults(nodes=nodes)
# Create QueryFusionRetriever with both retrievers
retriever = QueryFusionRetriever(
[vector_retriever, bm25_retriever],
similarity_top_k=2,
num_queries=4, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=True,
# query_gen_prompt="...", # we could override the query generation prompt here
)
# Apply nested async to run in a notebook
nest_asyncio.apply() In this code:
This should resolve the error and correctly apply metadata filters to both retrievers [1]. |
Beta Was this translation helpful? Give feedback.
-
Thank you so much for the response. source_nodes = vector_retriever.retrieve(' ') what does this line do, can you please elaborate it for me? |
Beta Was this translation helpful? Give feedback.
-
The updated code is still giving the same error ValueError Traceback (most recent call last) File c:\Users\VijayKumaraswamyLagu\Documents\RAG\Gen_AI_env\Lib\site-packages\llama_index\retrievers\bm25\base.py:60, in BM25Retriever.from_defaults(cls, index, nodes, docstore, tokenizer, similarity_top_k, verbose) ValueError: Please pass exactly one of index, nodes, or docstore. |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
import os
import openai
os.environ["OPENAI_API_KEY"] = "sk-lvksfbvhskvbdksvbksdfvbkfdsvhkb"
openai.api_key = os.environ["OPENAI_API_KEY"]
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
chroma_client = chromadb.HttpClient(host='gffjd', port=8000)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2")
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
from llama_index.core.vector_stores import (
MetadataFilter,
MetadataFilters,
FilterOperator,
)
filters = MetadataFilters(
filters=[
MetadataFilter(
key="SAMSUNG", value="10-10-2019"
),
]
)
from llama_index.core import VectorStoreIndex
chroma_collection = chroma_client.get_collection("bank_mandate_forms_with_company_date_to_keys_summary_in_content")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store,
storage_context=storage_context, embed_model=embed_model
)
from llama_index.retrievers.bm25 import BM25Retriever
vector_retriever = index.as_retriever(similarity_top_k=2, filters=filters)
source_nodes = index.as_retriever(similarity_top_k=100, filters=filters).retrieve(' ')
nodes = [x.node for x in source_nodes]
bm25_retriever = BM25Retriever.from_defaults(nodes=nodes)
from llama_index.core.retrievers import QueryFusionRetriever
retriever = QueryFusionRetriever(
[vector_retriever, bm25_retriever],
similarity_top_k=2,
num_queries=4, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=True,
# query_gen_prompt="...", # we could override the query generation prompt here
)
apply nested async to run in a notebook
import nest_asyncio
nest_asyncio.apply()
To the above code, I am trying to add metadata filters to both retrievers. while doing that i am having this error, can you help me with this?
{
"name": "ValueError",
"message": "Please pass exactly one of index, nodes, or docstore.",
"stack": "---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[13], line 7
5 source_nodes = index.as_retriever(filters=filters).retrieve(' ')
6 nodes = [x.node for x in source_nodes]
----> 7 bm25_retriever = BM25Retriever.from_defaults(nodes=nodes)
File c:\Users\VijayKumaraswamyLagu\Documents\RAG\Gen_AI_env\Lib\site-packages\llama_index\retrievers\bm25\base.py:60, in BM25Retriever.from_defaults(cls, index, nodes, docstore, tokenizer, similarity_top_k, verbose)
48 @classmethod
49 def from_defaults(
50 cls,
(...)
57 ) -> "BM25Retriever":
58 # ensure only one of index, nodes, or docstore is passed
59 if sum(bool(val) for val in [index, nodes, docstore]) != 1:
---> 60 raise ValueError("Please pass exactly one of index, nodes, or docstore.")
62 if index is not None:
63 docstore = index.docstore
ValueError: Please pass exactly one of index, nodes, or docstore."
}
Beta Was this translation helpful? Give feedback.
All reactions