-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathingest.py
More file actions
25 lines (15 loc) · 726 Bytes
/
ingest.py
File metadata and controls
25 lines (15 loc) · 726 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
def create_vector_store(pdf_path, db_dir="vectorstore/"):
loader = PyPDFLoader(pdf_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(chunks, embedding=embeddings)
os.makedirs(db_dir, exist_ok=True)
vectordb.save_local(db_dir)
return vectordb