Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,2 +1,15 @@
# Project environment variables
PROJECT_NAME=template-langgraph

# Azure OpenAI Service
AZURE_OPENAI_ENDPOINT="https://YOUR_AZURE_OPENAI_ENDPOINT/"
AZURE_OPENAI_API_KEY="YOUR_AZURE_OPENAI_API_KEY"
AZURE_OPENAI_API_VERSION="2024-10-21"
AZURE_OPENAI_MODEL_CHAT="gpt-4o"
AZURE_OPENAI_MODEL_EMBEDDING="text-embedding-3-small"

# CSV Loader Settings
CSV_LOADER_DATA_DIR_PATH="./data"

# Qdrant Settings
QDRANT_URL="http://localhost:6333"
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,5 @@ cython_debug/
# Project
*.env
requirements.txt
assets/
data/
8 changes: 8 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
services:
qdrant:
image: qdrant/qdrant:v1.15.1
container_name: qdrant
ports:
- "6333:6333" # Dashboard: http://localhost:6333/dashboard
volumes:
- ./assets/qdrant_data:/qdrant/storage
18 changes: 18 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
# template-langgraph

## Operations

```bash
# Add documents to Qdrant
uv run python -m template_langgraph.tasks.add_documents_to_qdrant
```

## References

### Models

- [AzureOpenAIEmbeddings](https://python.langchain.com/docs/integrations/text_embedding/azureopenai/)

### Tools

- [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
- [Qdrant](https://github.com/qdrant/qdrant)
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@ description = "A GitHub template repository for Python"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"langchain-community>=0.3.27",
"langchain-openai>=0.3.28",
"langchain-text-splitters>=0.3.9",
"langgraph>=0.6.2",
"openai>=1.98.0",
"pydantic-settings>=2.9.1",
"python-dotenv>=1.1.0",
"qdrant-client>=1.15.1",
"typer>=0.16.0",
]

Expand Down Expand Up @@ -52,3 +58,7 @@ show_missing = true

[tool.ty]
environment = { python-version = "3.10" }

[tool.ty.rules]
unknown-argument = "ignore"
invalid-parameter-default = "ignore"
Empty file.
48 changes: 48 additions & 0 deletions template_langgraph/models/azure_openais.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from functools import lru_cache

from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
azure_openai_endpoint: str = "https://<YOUR_AOAI_NAME>.openai.azure.com/"
azure_openai_api_key: str = "<YOUR_API_KEY>"
azure_openai_api_version: str = "2024-10-21"
azure_openai_model_chat: str = "gpt-4o"
azure_openai_model_embedding: str = "text-embedding-3-small"

model_config = SettingsConfigDict(
env_file=".env",
env_ignore_empty=True,
extra="ignore",
)


@lru_cache
def get_azure_openai_settings() -> Settings:
return Settings()


class AzureOpenAiWrapper:
def __init__(self, settings: Settings = None):
if settings is None:
settings = get_azure_openai_settings()

self.chat_model = AzureChatOpenAI(
azure_endpoint=settings.azure_openai_endpoint,
api_key=settings.azure_openai_api_key,
api_version=settings.azure_openai_api_version,
azure_deployment=settings.azure_openai_model_chat,
temperature=0.0,
streaming=True,
)
self.embedding_model = AzureOpenAIEmbeddings(
azure_endpoint=settings.azure_openai_endpoint,
api_key=settings.azure_openai_api_key,
api_version=settings.azure_openai_api_version,
azure_deployment=settings.azure_openai_model_embedding,
)

def create_embedding(self, text: str):
"""Create an embedding for the given text."""
return self.embedding_model.embed_query(text)
Empty file.
50 changes: 50 additions & 0 deletions template_langgraph/tasks/add_documents_to_qdrant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import logging

from qdrant_client.models import PointStruct

from template_langgraph.loggers import get_logger
from template_langgraph.models.azure_openais import AzureOpenAiWrapper
from template_langgraph.tools.csv_loaders import CsvLoaderWrapper
from template_langgraph.tools.qdrants import QdrantClientWrapper

logger = get_logger(__name__)
logger.setLevel(logging.INFO)
COLLECTION_NAME = "documents"

if __name__ == "__main__":
# Load documents from CSV files
documents = CsvLoaderWrapper().load_csv_docs()
logger.info(f"Loaded {len(documents)} documents from CSV.")

# hardcoded collection name for demonstration purposes

logger.info(f"Upserting {len(documents)} documents into Qdrant collection: {COLLECTION_NAME}")
points = []
for i, doc in enumerate(documents):
logger.debug(f"Processing document {i}: {doc.metadata.get('source', 'unknown')}")
content = doc.page_content
content = content.replace(" ", "")
embedding = AzureOpenAiWrapper().create_embedding(content)
points.append(
PointStruct(
id=i,
vector=embedding,
payload={
"file_name": doc.metadata.get("source", f"doc_{i}"),
"content": content,
},
)
)

qdrant_client = QdrantClientWrapper()
qdrant_client.create_collection(
collection_name=COLLECTION_NAME,
vector_size=len(points[0].vector) if points else 1536, # default vector size
)

logger.info(f"Created Qdrant collection: {COLLECTION_NAME}")
operation_info = qdrant_client.upsert_points(
collection_name=COLLECTION_NAME,
points=points,
)
logger.info(f"Upserted {len(points)} points into Qdrant collection: {COLLECTION_NAME}")
Empty file.
47 changes: 47 additions & 0 deletions template_langgraph/tools/csv_loaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os
from functools import lru_cache
from glob import glob

from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_core.documents import Document
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
csv_loader_data_dir_path: str = "./data"

model_config = SettingsConfigDict(
env_file=".env",
env_ignore_empty=True,
extra="ignore",
)


@lru_cache
def get_csv_loader_settings() -> Settings:
"""Get CSV loader settings."""
return Settings()


class CsvLoaderWrapper:
def __init__(
self,
settings: Settings = None,
):
if settings is None:
settings = get_csv_loader_settings()
self.settings = settings

def load_csv_docs(self) -> list[Document]:
"""Load CSV documents from the specified directory."""
csv_path = glob(
os.path.join(self.settings.csv_loader_data_dir_path, "**", "*.csv"),
recursive=True,
)
docs = []

for path in csv_path:
loader = CSVLoader(file_path=path)
docs.extend(loader.load())

return docs
61 changes: 61 additions & 0 deletions template_langgraph/tools/qdrants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from functools import lru_cache

from pydantic_settings import BaseSettings, SettingsConfigDict
from qdrant_client import QdrantClient
from qdrant_client.http.models import UpdateResult
from qdrant_client.models import Distance, PointStruct, VectorParams


class Settings(BaseSettings):
qdrant_url: str = "http://localhost:6333"

model_config = SettingsConfigDict(
env_file=".env",
env_ignore_empty=True,
extra="ignore",
)


@lru_cache
def get_qdrant_settings() -> Settings:
"""Get Qdrant settings."""
return Settings()


class QdrantClientWrapper:
def __init__(
self,
settings: Settings = None,
):
if settings is None:
settings = get_qdrant_settings()
self.client = QdrantClient(
url=settings.qdrant_url,
)

def create_collection(
self,
collection_name: str,
vector_size: int = 1536,
) -> bool:
"""Create a collection in Qdrant."""
result = self.client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(
size=vector_size,
distance=Distance.COSINE,
),
)
return result

def upsert_points(
self,
collection_name: str,
points: list[PointStruct],
) -> UpdateResult:
"""Upsert points into a Qdrant collection."""
return self.client.upsert(
collection_name=collection_name,
points=points,
wait=True,
)
Loading