Complete guide to LlamaIndex for building advanced generative AI projects with data indexing and retrieval.
- Introduction to LlamaIndex
- Core Concepts
- Getting Started
- Loading Data
- Indexing
- Querying
- Retrievers
- Query Engines
- Chat Engines
- Advanced Features
- Real-World Projects
- Best Practices
- Resources
LlamaIndex (formerly GPT Index) is a data framework for LLM applications. It provides:
- Data Indexing: Efficient indexing of documents
- Query Interface: Natural language querying
- Retrieval: Smart document retrieval
- Integration: Works with various LLMs and data sources
- Efficient Indexing: Optimized for large document collections
- Flexible Querying: Multiple query strategies
- Data Connectors: Easy integration with various data sources
- Production Ready: Built for scalable applications
pip install llama-index
# Or with specific integrations
pip install llama-index[openai]
pip install llama-index[all]- Documents: Your data (text, PDFs, etc.)
- Nodes: Chunks of documents
- Index: Data structure for efficient retrieval
- Retriever: Finds relevant nodes
- Query Engine: Answers questions using retrieved context
- Chat Engine: Conversational interface
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import OpenAI
# Load documents
documents = SimpleDirectoryReader("./data").load_data()
# Create index
index = VectorStoreIndex.from_documents(documents)
# Create query engine
query_engine = index.as_query_engine()
# Query
response = query_engine.query("What is machine learning?")
print(response)from llama_index import SimpleDirectoryReader
# Load all files from directory
documents = SimpleDirectoryReader("./documents").load_data()# Load specific files
documents = SimpleDirectoryReader(
input_files=["./doc1.pdf", "./doc2.txt"]
).load_data()from llama_index import download_loader
WebPageReader = download_loader("WebPageReader")
loader = WebPageReader()
documents = loader.load_data(urls=["https://example.com/article"])from llama_index import download_loader
DatabaseReader = download_loader("DatabaseReader")
loader = DatabaseReader(uri="sqlite:///database.db")
documents = loader.load_data(query="SELECT * FROM articles")from llama_index import download_loader
NotionPageReader = download_loader("NotionPageReader")
loader = NotionPageReader(integration_token="your_token")
documents = loader.load_data(page_ids=["page-id-1", "page-id-2"])from llama_index import VectorStoreIndex, StorageContext
from llama_index.vector_stores import SimpleVectorStore
# Create index
index = VectorStoreIndex.from_documents(documents)
# Save index
index.storage_context.persist(persist_dir="./storage")
# Load index
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)from llama_index import TreeIndex
# Hierarchical index
index = TreeIndex.from_documents(documents)from llama_index import KeywordTableIndex
# Keyword-based index
index = KeywordTableIndex.from_documents(documents)from llama_index import ComposableGraph
# Combine multiple indexes
vector_index = VectorStoreIndex.from_documents(documents)
tree_index = TreeIndex.from_documents(documents)
graph = ComposableGraph.from_indices(
[vector_index, tree_index]
)query_engine = index.as_query_engine()
response = query_engine.query("What is the main topic?")
print(response)
print(response.source_nodes) # Source documentsquery_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Explain machine learning")
for token in response.response_gen:
print(token, end="")from llama_index import ResponseSynthesizer
from llama_index.retrievers import VectorIndexRetriever
# Custom retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=5
)
# Custom response synthesizer
response_synthesizer = ResponseSynthesizer.from_args(
response_mode="tree_summarize"
)
# Create query engine
query_engine = RetrieverQueryEngine(
retriever=retriever,
response_synthesizer=response_synthesizer
)from llama_index.retrievers import VectorIndexRetriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=5
)
nodes = retriever.retrieve("query")from llama_index.retrievers import BM25Retriever
retriever = BM25Retriever.from_defaults(
index=index,
similarity_top_k=5
)from llama_index.retrievers import VectorIndexRetriever, BM25Retriever
from llama_index.query_engine import RetrieverQueryEngine
# Combine vector and keyword search
vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=3)
bm25_retriever = BM25Retriever.from_defaults(index=index, similarity_top_k=3)
# Hybrid
from llama_index.retrievers import BaseRetriever
class HybridRetriever(BaseRetriever):
def __init__(self, vector_retriever, bm25_retriever):
self.vector_retriever = vector_retriever
self.bm25_retriever = bm25_retriever
super().__init__()
def _retrieve(self, query_bundle):
vector_nodes = self.vector_retriever.retrieve(query_bundle)
bm25_nodes = self.bm25_retriever.retrieve(query_bundle)
# Combine and deduplicate
all_nodes = vector_nodes + bm25_nodes
return list(set(all_nodes))query_engine = index.as_query_engine()
response = query_engine.query("question")from llama_index.query_engine import RouterQueryEngine
from llama_index.selectors import LLMSingleSelector
# Route to different indexes based on query
query_engine = RouterQueryEngine.from_defaults(
selector=LLMSingleSelector.from_defaults(),
query_engine_tools=[
QueryEngineTool.from_defaults(
query_engine=vector_query_engine,
description="Useful for semantic search"
),
QueryEngineTool.from_defaults(
query_engine=keyword_query_engine,
description="Useful for keyword search"
)
]
)from llama_index.query_engine import SubQuestionQueryEngine
# Break complex query into sub-questions
query_engine = SubQuestionQueryEngine.from_defaults(
query_engine_tools=[...],
service_context=service_context
)chat_engine = index.as_chat_engine()
response = chat_engine.chat("Hello")
response = chat_engine.chat("What did I just say?") # Remembers contextfrom llama_index.chat_engine import CondenseQuestionChatEngine
chat_engine = CondenseQuestionChatEngine.from_defaults(
query_engine=query_engine,
verbose=True
)from llama_index.chat_engine import ContextChatEngine
chat_engine = ContextChatEngine.from_defaults(
retriever=retriever,
service_context=service_context
)from llama_index.llms import OpenAI, HuggingFaceLLM
# OpenAI
llm = OpenAI(temperature=0.7, model="gpt-3.5-turbo")
# Hugging Face
llm = HuggingFaceLLM(
model_name="meta-llama/Llama-2-7b-chat-hf",
tokenizer_name="meta-llama/Llama-2-7b-chat-hf"
)
# Use in service context
from llama_index import ServiceContext
service_context = ServiceContext.from_defaults(llm=llm)from llama_index.embeddings import OpenAIEmbedding, HuggingFaceEmbedding
# OpenAI
embeddings = OpenAIEmbedding()
# Hugging Face
embeddings = HuggingFaceEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Use in service context
service_context = ServiceContext.from_defaults(embed_model=embeddings)from llama_index.postprocessor import SimilarityPostprocessor
# Filter by similarity
postprocessor = SimilarityPostprocessor(similarity_cutoff=0.7)
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor]
)# Different response synthesis modes
query_engine = index.as_query_engine(
response_mode="default", # or "compact", "tree_summarize", "accumulate"
similarity_top_k=5
)from llama_index import VectorStoreIndex, SimpleDirectoryReader
# Load documents
documents = SimpleDirectoryReader("./documents").load_data()
# Create index
index = VectorStoreIndex.from_documents(documents)
# Create query engine
query_engine = index.as_query_engine()
# Interactive Q&A
while True:
question = input("Ask a question (or 'quit'): ")
if question.lower() == "quit":
break
response = query_engine.query(question)
print(f"Answer: {response}")
print(f"Sources: {len(response.source_nodes)} documents")from llama_index import download_loader
CodeReader = download_loader("CodeReader")
loader = CodeReader()
documents = loader.load_data(file_path="./codebase/")
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("How does the authentication work?")from llama_index import SimpleDirectoryReader, VectorStoreIndex
# Load research papers
documents = SimpleDirectoryReader("./papers/", recursive=True).load_data()
# Create index with metadata
index = VectorStoreIndex.from_documents(
documents,
show_progress=True
)
# Query with filtering
query_engine = index.as_query_engine(
similarity_top_k=5,
response_mode="tree_summarize"
)
response = query_engine.query("What are the key findings across all papers?")- Chunking: Optimize chunk size for your documents
- Indexing: Choose appropriate index type
- Retrieval: Use hybrid retrieval for better results
- Caching: Cache queries when possible
- Metadata: Add metadata for filtering
- Evaluation: Test with diverse queries
Remember: LlamaIndex excels at indexing and querying large document collections. Use it for document-based AI applications!