@@ -189,34 +189,39 @@ def build_duckdb_and_summary_cards(
189189 return summary_cards
190190
191191
192- def reset_faiss_index (index_path : Path ):
193- if index_path .exists ():
194- print ("Reseting previous index..." )
195- shutil .rmtree (index_path )
196-
197-
198192def embed_and_index_all_docs (
199193 data_dir : Path = DATA ,
200194 db_path : Path = DATA / "generated_db" / "csv_excel_to_db.duckdb" ,
201195 index_path : Path = DATA / "generated_db" / "faiss_index" ,
196+ load_data : bool = False ,
202197):
203- # delete old FAISS index if it exists
204- reset_faiss_index (index_path )
205-
206198 # load embeedings and vector store
207199 embeddings = GoogleGenerativeAIEmbeddings (model = "models/embedding-001" )
208200
209- # LOAD AND SPLIT TEXT DOCS
210- text_chunks = load_and_split_text_docs (data_dir )
211- # LOAD IMAGES (OCR converts image -> text)
212- image_text_docs = load_image_docs_as_text (data_dir )
213- # LOAD AND SPLIT CSV/EXCEL DOCS
214- summary_cards = build_duckdb_and_summary_cards (data_dir , db_path )
215-
216- # vector_store.add_documents(text_chunks + image_text_docs + summary_cards)
217- vector_store = FAISS .from_documents (
218- text_chunks + image_text_docs + summary_cards , embeddings
219- )
220- vector_store .save_local (index_path )
201+ if not load_data and index_path .exists ():
202+ # load existing FAISS index
203+ vector_store = FAISS .load_local (
204+ index_path , embeddings , allow_dangerous_deserialization = True
205+ )
206+ print ("Loaded existing FAISS index and database." )
207+ else :
208+ # delete old FAISS index if it exists
209+ if index_path .exists ():
210+ print ("Reseting previous index..." )
211+ shutil .rmtree (index_path )
212+
213+ # LOAD AND SPLIT TEXT DOCS
214+ text_chunks = load_and_split_text_docs (data_dir )
215+ # LOAD IMAGES (OCR converts image -> text)
216+ image_text_docs = load_image_docs_as_text (data_dir )
217+ # LOAD AND SPLIT CSV/EXCEL DOCS
218+ summary_cards = build_duckdb_and_summary_cards (data_dir , db_path )
219+
220+ # vector_store.add_documents(text_chunks + image_text_docs + summary_cards)
221+ vector_store = FAISS .from_documents (
222+ text_chunks + image_text_docs + summary_cards , embeddings
223+ )
224+ vector_store .save_local (index_path )
225+ print ("Built and saved new FAISS index." )
221226
222227 return embeddings , vector_store
0 commit comments