-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrag_crewai.py
More file actions
58 lines (48 loc) · 2 KB
/
rag_crewai.py
File metadata and controls
58 lines (48 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import time
start_time = time.time()
from crewai import Agent, Task, Crew
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools import Tool
# Load and process PDF
loader = PyPDFLoader("document.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
# Create embeddings and Chroma vector database
custom_embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
vectorstore = Chroma.from_documents(docs, custom_embeddings)
# PDF search tool
def search_pdf(query):
docs = vectorstore.similarity_search(query, k=1)
return docs[0].page_content if docs else "No results found."
pdf_tool = Tool(
name="PDF Search",
func=search_pdf,
description="Used to search within a PDF document."
)
data_analyst = Agent(
role='Data Analyst',
goal='Analyze the information in the PDF document and extract key insights',
backstory='You are an experienced data analyst specialized in analyzing complex data and extracting meaningful information. You have the ability to quickly and accurately process information from PDF documents.',
verbose=True,
allow_delegation=False,
tools=[pdf_tool]
)
test_task = Task(
description="'document.pdf' file analysis. How many bastions and towers does Alanya Castle consist of?",
expected_output="Use the following context pieces to answer the question at the end.If you don't know the answer, just say you don't know, don't try to make up an answer. Give answers in great detail.",
agent=data_analyst
)
crew = Crew(
agents=[data_analyst],
tasks=[test_task],
verbose=2 # Detailed output
)
result = crew.kickoff()
print(result)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nTotal execution time: {elapsed_time:.2f} seconds")