Implement RAPTOR #15471
Replies: 6 comments 6 replies
-
@dosu how do I implement this making sure that I'm able to add more documents in the future if needed. I have the following initialisation for my class:L self.redis_client = redis.from_url(
self.config.get("REDIS_VECTOR_STORE")
)
self.docstore = RedisDocumentStore.from_redis_client(
redis_client=self.redis_client, namespace=self.namespace
)
# TODO: UNcomment to use redis vector store
self.vector_store = RedisVectorStore(
index_name=self.namespace,
redis_url=self.config.get("REDIS_VECTOR_STORE"),
metadata_fields=["namespace"],
)
self.storage_context = StorageContext.from_defaults(
docstore=self.docstore,
index_store=RedisIndexStore.from_redis_client(
redis_client=self.redis_client, namespace=self.namespace
),
vector_store=self.vector_store,
)
try:
self.base_index = load_index_from_storage(self.storage_context)
print("[INFO] Index found at storage")
except ValueError as e:
print("[INFO] No index found at storage")
self.base_index = VectorStoreIndex(
nodes=[],
storage_context=self.storage_context,
store_nodes_override=True,
)
self.base_retriever = self.base_index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
),
)
self.retriever = AutoMergingRetriever(
self.base_retriever, self.storage_context, verbose=verbose
)
self.recency_postprocessor = FixedRecencyPostprocessor(
top_k=recency, date_key="creation_date"
)
self.postprocessor = SentenceTransformerRerank(
model=self.config.get("RAG_RERANK_MODEL"),
top_n=similarity_top_k_reranker,
)
self.llm = OpenAI(
model=self.config.get("LLM_ANSWERING_MODEL"),
api_key=self.config.get("OPENAI_API_KEY"),
)
self.response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT, llm=self.llm
) I also have the following helper functions: def create_document(self, text, filename):
documents = []
for idx, page in text.items():
document = Document(text=page)
current_date = datetime.now().strftime("%Y-%m-%d")
document.metadata = {
"filename": filename,
"page_number": idx,
"creation_date": current_date,
"last_accessed_date": current_date,
"last_modified_date": current_date,
"namespace": self.namespace,
}
documents.append(document)
return documents
def return_nodes(self, docs):
nodes = self.node_parser.get_nodes_from_documents(docs)
return nodes
def add_nodes_to_doc_store(self, all_nodes):
try:
self.docstore.add_documents(nodes=all_nodes)
except Exception as e:
print(f"An error occurred when adding documents to doc store: {e}")
traceback.print_exc()
raise Exception(
f"An error occurred when adding documents to doc store: {e}"
)
def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
bm24_retriever = BM25Retriever.from_defaults(
docstore=self.docstore, similarity_top_k=self.similarity_top_k
)
fusion_retriever = QueryFusionRetriever(
[self.retriever, bm24_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
)
query_bundle = QueryBundle(query_str=query)
retrived_nodes = fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") How can I modify it to use raptor ? |
Beta Was this translation helpful? Give feedback.
-
@dosu you forgot the HierarchicalNodeParser should it be used for adding the nodes? Could you also use QueryFusionRetriever to merger raptor + bm25 in the generated code |
Beta Was this translation helpful? Give feedback.
-
@dosu in the code below define my metadata filter properly: self.raptor_pack = RaptorPack(
documents=[],
embed_model=Settings.embed_model,
llm=self.llm,
vector_store=self.vector_store,
similarity_top_k=self.similarity_top_k,
mode="collapsed",
summary_module=self.summary_module,
)
self.bm25_retriever = BM25Retriever.from_defaults(
docstore=self.docstore, similarity_top_k=self.similarity_top_k
)
self.fusion_retriever = QueryFusionRetriever(
[self.raptor_pack.retriever, self.bm25_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
)
self.node_parser = HierarchicalNodeParser.from_defaults()
try:
self.base_index = load_index_from_storage(self.storage_context)
print("[INFO] Index found at storage")
except ValueError as e:
print("[INFO] No index found at storage")
self.base_index = VectorStoreIndex(
nodes=[],
storage_context=self.storage_context,
store_nodes_override=True,
)
self.base_retriever = self.base_index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
),
)
self.retriever = AutoMergingRetriever(
self.base_retriever, self.storage_context, verbose=verbose
)
self.recency_postprocessor = FixedRecencyPostprocessor(
top_k=recency, date_key="creation_date"
)
self.postprocessor = SentenceTransformerRerank(
model=self.config.get("RAG_RERANK_MODEL"),
top_n=similarity_top_k_reranker,
)
self.response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT, llm=self.llm
) the code fragment here does not take into account my metadata filter: self.fusion_retriever = QueryFusionRetriever(
[self.raptor_pack.retriever, self.bm25_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
) the filter should be something like: filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
), this is being used in the function as: def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
TypeError: QueryFusionRetriever.init() got an unexpected keyword argument 'filters |
Beta Was this translation helpful? Give feedback.
-
this does not work. Can we do it here in the function? def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
@dosu what I meant is implement a metadata filter to filter based on this: MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
), def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
I have the following initialisation for my class:L
constructor code:
I also have the following helper functions:
How can I modify it to use raptor ?
@dosu
Beta Was this translation helpful? Give feedback.
All reactions