|
3 | 3 | """ |
4 | 4 |
|
5 | 5 | from typing import List, Optional |
| 6 | +import os |
6 | 7 |
|
7 | 8 | from langchain.docstore.document import Document |
8 | 9 | from langchain.retrievers import ContextualCompressionRetriever |
@@ -50,6 +51,7 @@ def __init__( |
50 | 51 | self.verbose = ( |
51 | 52 | False if node_config is None else node_config.get("verbose", False) |
52 | 53 | ) |
| 54 | + self.cache_path = node_config.get("cache_path", False) |
53 | 55 |
|
54 | 56 | def execute(self, state: dict) -> dict: |
55 | 57 | """ |
@@ -98,7 +100,24 @@ def execute(self, state: dict) -> dict: |
98 | 100 | ) |
99 | 101 | embeddings = self.embedder_model |
100 | 102 |
|
101 | | - retriever = FAISS.from_documents(chunked_docs, embeddings).as_retriever() |
| 103 | + folder_name = self.node_config.get("cache_path", "cache") |
| 104 | + |
| 105 | + if self.node_config.get("cache_path", False) and not os.path.exists(folder_name): |
| 106 | + index = FAISS.from_documents(chunked_docs, embeddings) |
| 107 | + os.makedirs(folder_name) |
| 108 | + index.save_local(folder_name) |
| 109 | + self.logger.info("--- (indexes saved to cache) ---") |
| 110 | + |
| 111 | + elif self.node_config.get("cache_path", False) and os.path.exists(folder_name): |
| 112 | + index = FAISS.load_local(folder_path=folder_name, |
| 113 | + embeddings=embeddings, |
| 114 | + allow_dangerous_deserialization=True) |
| 115 | + self.logger.info("--- (indexes loaded from cache) ---") |
| 116 | + |
| 117 | + else: |
| 118 | + index = FAISS.from_documents(chunked_docs, embeddings) |
| 119 | + |
| 120 | + retriever = index.as_retriever() |
102 | 121 |
|
103 | 122 | redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings) |
104 | 123 | # similarity_threshold could be set, now k=20 |
|
0 commit comments