Skip to content

Commit f179471

Browse files
authored
Merge pull request #4 from ks6088ts-labs/feature/issue-3_qdrant-tool
implement an indexer for adding documents to Qdrant
2 parents 4abaa23 + 100dd43 commit f179471

File tree

13 files changed

+1899
-4
lines changed

13 files changed

+1899
-4
lines changed

.env.template

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,15 @@
11
# Project environment variables
22
PROJECT_NAME=template-langgraph
3+
4+
# Azure OpenAI Service
5+
AZURE_OPENAI_ENDPOINT="https://YOUR_AZURE_OPENAI_ENDPOINT/"
6+
AZURE_OPENAI_API_KEY="YOUR_AZURE_OPENAI_API_KEY"
7+
AZURE_OPENAI_API_VERSION="2024-10-21"
8+
AZURE_OPENAI_MODEL_CHAT="gpt-4o"
9+
AZURE_OPENAI_MODEL_EMBEDDING="text-embedding-3-small"
10+
11+
# CSV Loader Settings
12+
CSV_LOADER_DATA_DIR_PATH="./data"
13+
14+
# Qdrant Settings
15+
QDRANT_URL="http://localhost:6333"

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,5 @@ cython_debug/
162162
# Project
163163
*.env
164164
requirements.txt
165+
assets/
166+
data/

docker-compose.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
services:
2+
qdrant:
3+
image: qdrant/qdrant:v1.15.1
4+
container_name: qdrant
5+
ports:
6+
- "6333:6333" # Dashboard: http://localhost:6333/dashboard
7+
volumes:
8+
- ./assets/qdrant_data:/qdrant/storage

docs/index.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,19 @@
11
# template-langgraph
2+
3+
## Operations
4+
5+
```bash
6+
# Add documents to Qdrant
7+
uv run python -m template_langgraph.tasks.add_documents_to_qdrant
8+
```
9+
10+
## References
11+
12+
### Models
13+
14+
- [AzureOpenAIEmbeddings](https://python.langchain.com/docs/integrations/text_embedding/azureopenai/)
15+
16+
### Tools
17+
18+
- [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
19+
- [Qdrant](https://github.com/qdrant/qdrant)

pyproject.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@ description = "A GitHub template repository for Python"
55
readme = "README.md"
66
requires-python = ">=3.10"
77
dependencies = [
8+
"langchain-community>=0.3.27",
9+
"langchain-openai>=0.3.28",
10+
"langchain-text-splitters>=0.3.9",
11+
"langgraph>=0.6.2",
12+
"openai>=1.98.0",
813
"pydantic-settings>=2.9.1",
914
"python-dotenv>=1.1.0",
15+
"qdrant-client>=1.15.1",
1016
"typer>=0.16.0",
1117
]
1218

@@ -52,3 +58,7 @@ show_missing = true
5258

5359
[tool.ty]
5460
environment = { python-version = "3.10" }
61+
62+
[tool.ty.rules]
63+
unknown-argument = "ignore"
64+
invalid-parameter-default = "ignore"

template_langgraph/models/__init__.py

Whitespace-only changes.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from functools import lru_cache
2+
3+
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
4+
from pydantic_settings import BaseSettings, SettingsConfigDict
5+
6+
7+
class Settings(BaseSettings):
8+
azure_openai_endpoint: str = "https://<YOUR_AOAI_NAME>.openai.azure.com/"
9+
azure_openai_api_key: str = "<YOUR_API_KEY>"
10+
azure_openai_api_version: str = "2024-10-21"
11+
azure_openai_model_chat: str = "gpt-4o"
12+
azure_openai_model_embedding: str = "text-embedding-3-small"
13+
14+
model_config = SettingsConfigDict(
15+
env_file=".env",
16+
env_ignore_empty=True,
17+
extra="ignore",
18+
)
19+
20+
21+
@lru_cache
22+
def get_azure_openai_settings() -> Settings:
23+
return Settings()
24+
25+
26+
class AzureOpenAiWrapper:
27+
def __init__(self, settings: Settings = None):
28+
if settings is None:
29+
settings = get_azure_openai_settings()
30+
31+
self.chat_model = AzureChatOpenAI(
32+
azure_endpoint=settings.azure_openai_endpoint,
33+
api_key=settings.azure_openai_api_key,
34+
api_version=settings.azure_openai_api_version,
35+
azure_deployment=settings.azure_openai_model_chat,
36+
temperature=0.0,
37+
streaming=True,
38+
)
39+
self.embedding_model = AzureOpenAIEmbeddings(
40+
azure_endpoint=settings.azure_openai_endpoint,
41+
api_key=settings.azure_openai_api_key,
42+
api_version=settings.azure_openai_api_version,
43+
azure_deployment=settings.azure_openai_model_embedding,
44+
)
45+
46+
def create_embedding(self, text: str):
47+
"""Create an embedding for the given text."""
48+
return self.embedding_model.embed_query(text)

template_langgraph/tasks/__init__.py

Whitespace-only changes.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import logging
2+
3+
from qdrant_client.models import PointStruct
4+
5+
from template_langgraph.loggers import get_logger
6+
from template_langgraph.models.azure_openais import AzureOpenAiWrapper
7+
from template_langgraph.tools.csv_loaders import CsvLoaderWrapper
8+
from template_langgraph.tools.qdrants import QdrantClientWrapper
9+
10+
logger = get_logger(__name__)
11+
logger.setLevel(logging.INFO)
12+
COLLECTION_NAME = "documents"
13+
14+
if __name__ == "__main__":
15+
# Load documents from CSV files
16+
documents = CsvLoaderWrapper().load_csv_docs()
17+
logger.info(f"Loaded {len(documents)} documents from CSV.")
18+
19+
# hardcoded collection name for demonstration purposes
20+
21+
logger.info(f"Upserting {len(documents)} documents into Qdrant collection: {COLLECTION_NAME}")
22+
points = []
23+
for i, doc in enumerate(documents):
24+
logger.debug(f"Processing document {i}: {doc.metadata.get('source', 'unknown')}")
25+
content = doc.page_content
26+
content = content.replace(" ", "")
27+
embedding = AzureOpenAiWrapper().create_embedding(content)
28+
points.append(
29+
PointStruct(
30+
id=i,
31+
vector=embedding,
32+
payload={
33+
"file_name": doc.metadata.get("source", f"doc_{i}"),
34+
"content": content,
35+
},
36+
)
37+
)
38+
39+
qdrant_client = QdrantClientWrapper()
40+
qdrant_client.create_collection(
41+
collection_name=COLLECTION_NAME,
42+
vector_size=len(points[0].vector) if points else 1536, # default vector size
43+
)
44+
45+
logger.info(f"Created Qdrant collection: {COLLECTION_NAME}")
46+
operation_info = qdrant_client.upsert_points(
47+
collection_name=COLLECTION_NAME,
48+
points=points,
49+
)
50+
logger.info(f"Upserted {len(points)} points into Qdrant collection: {COLLECTION_NAME}")

template_langgraph/tools/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)