Skip to content

Commit 0072e2a

Browse files
committed
update redis to use newer vector store provider
1 parent 3174c9a commit 0072e2a

File tree

7 files changed

+46
-131
lines changed

7 files changed

+46
-131
lines changed

.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ EMBEDDING_LENGTH=768
2020
# === Redis ===
2121
REDIS_URL=redis://localhost:6379
2222
REDIS_INDEX=docs
23-
REDIS_SCHEMA=redis_schema.yaml
2423

2524
# === Elasticsearch ===
2625
ELASTIC_URL=http://localhost:9200

Containerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ COPY vector_db ./vector_db
1818
COPY loaders ./loaders
1919
COPY embed_documents.py .
2020
COPY config.py .
21-
COPY redis_schema.yaml .
2221
COPY .env .
2322

2423
RUN chown -R 1001:0 .

config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ def _init_db_provider(db_type: str) -> DBProvider:
114114
if db_type == "REDIS":
115115
url = get("REDIS_URL")
116116
index = os.getenv("REDIS_INDEX", "docs")
117-
schema = os.getenv("REDIS_SCHEMA", "redis_schema.yaml")
118-
return RedisProvider(embedding_model, url, index, schema)
117+
return RedisProvider(embedding_model, url, index)
119118

120119
elif db_type == "ELASTIC":
121120
url = get("ELASTIC_URL")

redis_schema.yaml

Lines changed: 0 additions & 53 deletions
This file was deleted.

requirements.in

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
beautifulsoup4
22
hf_xet
3+
langchain
34
langchain-community
45
langchain-elasticsearch
56
langchain-huggingface
67
langchain-postgres
78
langchain-qdrant
9+
langchain-redis
810
langchain-sqlserver
9-
langchain
10-
psycopg[binary]
1111
pyodbc
1212
pypdf
1313
python-dotenv
1414
qdrant-client
15-
redis < 6.0.0
1615
sentence-transformers
1716
unstructured[md]

requirements.txt

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ click==8.2.1
4646
# via
4747
# nltk
4848
# python-oxmsg
49+
coloredlogs==15.0.1
50+
# via redisvl
4951
cryptography==45.0.3
5052
# via
5153
# azure-identity
@@ -108,6 +110,8 @@ huggingface-hub==0.32.4
108110
# sentence-transformers
109111
# tokenizers
110112
# transformers
113+
humanfriendly==10.0
114+
# via coloredlogs
111115
hyperframe==6.1.0
112116
# via h2
113117
idna==3.10
@@ -117,7 +121,9 @@ idna==3.10
117121
# requests
118122
# yarl
119123
jinja2==3.1.6
120-
# via torch
124+
# via
125+
# langchain-redis
126+
# torch
121127
joblib==1.5.1
122128
# via
123129
# nltk
@@ -140,6 +146,7 @@ langchain-core==0.3.63
140146
# langchain-huggingface
141147
# langchain-postgres
142148
# langchain-qdrant
149+
# langchain-redis
143150
# langchain-sqlserver
144151
# langchain-text-splitters
145152
langchain-elasticsearch==0.3.2
@@ -150,6 +157,8 @@ langchain-postgres==0.0.14
150157
# via -r requirements.in
151158
langchain-qdrant==0.2.0
152159
# via -r requirements.in
160+
langchain-redis==0.2.0
161+
# via -r requirements.in
153162
langchain-sqlserver==0.1.2
154163
# via -r requirements.in
155164
langchain-text-splitters==0.3.8
@@ -169,6 +178,8 @@ markupsafe==3.0.2
169178
# via jinja2
170179
marshmallow==3.26.1
171180
# via dataclasses-json
181+
ml-dtypes==0.4.1
182+
# via redisvl
172183
mpmath==1.3.0
173184
# via sympy
174185
msal==1.32.3
@@ -194,9 +205,12 @@ numpy==1.26.4
194205
# elasticsearch
195206
# langchain-community
196207
# langchain-postgres
208+
# langchain-redis
197209
# langchain-sqlserver
210+
# ml-dtypes
198211
# pgvector
199212
# qdrant-client
213+
# redisvl
200214
# scikit-learn
201215
# scipy
202216
# transformers
@@ -263,12 +277,8 @@ protobuf==6.31.1
263277
# via qdrant-client
264278
psutil==7.0.0
265279
# via unstructured
266-
psycopg[binary]==3.2.9
267-
# via
268-
# -r requirements.in
269-
# langchain-postgres
270-
psycopg-binary==3.2.9
271-
# via psycopg
280+
psycopg==3.2.9
281+
# via langchain-postgres
272282
psycopg-pool==3.2.6
273283
# via langchain-postgres
274284
pycparser==2.22
@@ -281,6 +291,7 @@ pydantic==2.11.5
281291
# langsmith
282292
# pydantic-settings
283293
# qdrant-client
294+
# redisvl
284295
# unstructured-client
285296
pydantic-core==2.33.2
286297
# via pydantic
@@ -310,12 +321,17 @@ python-magic==0.4.27
310321
# via unstructured
311322
python-oxmsg==0.0.2
312323
# via unstructured
324+
python-ulid==3.0.0
325+
# via
326+
# langchain-redis
327+
# redisvl
313328
pyyaml==6.0.2
314329
# via
315330
# huggingface-hub
316331
# langchain
317332
# langchain-community
318333
# langchain-core
334+
# redisvl
319335
# transformers
320336
qdrant-client==1.14.2
321337
# via
@@ -324,7 +340,9 @@ qdrant-client==1.14.2
324340
rapidfuzz==3.13.0
325341
# via unstructured
326342
redis==5.2.1
327-
# via -r requirements.in
343+
# via redisvl
344+
redisvl==0.4.1
345+
# via langchain-redis
328346
regex==2024.11.6
329347
# via
330348
# nltk
@@ -376,10 +394,13 @@ sqlalchemy==2.0.41
376394
# langchain-sqlserver
377395
sympy==1.14.0
378396
# via torch
397+
tabulate==0.9.0
398+
# via redisvl
379399
tenacity==9.1.2
380400
# via
381401
# langchain-community
382402
# langchain-core
403+
# redisvl
383404
threadpoolctl==3.6.0
384405
# via scikit-learn
385406
tokenizers==0.21.1

vector_db/redis_provider.py

Lines changed: 14 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import logging
2-
from typing import List, Optional
2+
from typing import List
33

4-
import redis
5-
from langchain_community.vectorstores.redis import Redis as RedisVectorStore
64
from langchain_core.documents import Document
5+
from langchain_redis import RedisVectorStore
76

87
from vector_db.db_provider import DBProvider
98

@@ -14,81 +13,44 @@ class RedisProvider(DBProvider):
1413
"""
1514
Redis-backed vector DB provider using RediSearch and LangChain's Redis integration.
1615
17-
This provider connects to a Redis instance, checks if the specified index exists,
18-
and either loads from it or creates a new index on first insert. Vectors are stored
19-
using the RediSearch module with configurable schema.
20-
2116
Attributes:
22-
redis_client (redis.Redis): Raw Redis client for low-level access.
23-
db (Optional[RedisVectorStore]): LangChain vector store, lazily created on first add.
17+
db (RedisVectorStore): LangChain vector store
2418
2519
Args:
2620
embedding_model (str): Name of the embedding model to use for text chunks.
2721
url (str): Redis connection string (e.g., "redis://localhost:6379").
2822
index (str): RediSearch index name to use for vector storage.
29-
schema (str): Path to schema file where the RediSearch index definition is written.
3023
3124
Example:
3225
>>> from vector_db.redis_provider import RedisProvider
3326
>>> provider = RedisProvider(
3427
... embedding_model="BAAI/bge-large-en-v1.5",
3528
... url="redis://localhost:6379",
36-
... index="validated_docs",
37-
... schema="redis_schema.yaml"
29+
... index="validated_docs"
3830
... )
3931
>>> provider.add_documents(docs)
4032
"""
4133

42-
def __init__(self, embedding_model: str, url: str, index: str, schema: str):
34+
def __init__(self, embedding_model: str, url: str, index: str):
4335
"""
4436
Initialize a Redis-backed vector store provider.
4537
4638
Args:
4739
embedding_model (str): HuggingFace model for embeddings.
4840
url (str): Redis connection string.
4941
index (str): Name of the RediSearch index to use.
50-
schema (str): Path to write RediSearch schema YAML (used on creation).
5142
"""
5243
super().__init__(embedding_model)
53-
self.url = url
54-
self.index = index
55-
self.schema = schema
56-
self.db: Optional[RedisVectorStore] = None
57-
58-
try:
59-
self.redis_client = redis.from_url(self.url)
60-
self.redis_client.ping()
61-
logger.info("Connected to Redis instance at %s", self.url)
62-
except Exception:
63-
logger.exception("Failed to connect to Redis at %s", self.url)
64-
raise
6544

66-
if self._index_exists():
67-
logger.info("Using existing Redis index: %s", self.index)
68-
self.db = RedisVectorStore.from_existing_index(
69-
embedding=self.embeddings,
70-
redis_url=self.url,
71-
index_name=self.index,
72-
schema=self.schema,
73-
)
74-
else:
75-
logger.info(
76-
"Redis index %s does not exist. Will create on first add_documents call.",
77-
self.index,
78-
)
45+
self.db = RedisVectorStore(
46+
index_name=index, embeddings=self.embeddings, redis_url=url
47+
)
7948

80-
def _index_exists(self) -> bool:
81-
"""
82-
Check whether the Redis index already exists.
83-
84-
Returns:
85-
bool: True if the index exists, False otherwise.
86-
"""
87-
try:
88-
self.redis_client.ft(self.index).info()
89-
return True
90-
except Exception:
91-
return False
49+
logger.info(
50+
"Connected to Redis at %s (index: %s)",
51+
url,
52+
index,
53+
)
9254

9355
def add_documents(self, docs: List[Document]) -> None:
9456
"""
@@ -97,15 +59,4 @@ def add_documents(self, docs: List[Document]) -> None:
9759
Args:
9860
docs (List[Document]): LangChain document chunks to embed and store.
9961
"""
100-
if self.db is None:
101-
logger.info("Creating new Redis index: %s", self.index)
102-
self.db = RedisVectorStore.from_documents(
103-
documents=docs,
104-
embedding=self.embeddings,
105-
redis_url=self.url,
106-
index_name=self.index,
107-
)
108-
logger.info("Writing Redis schema to file: %s", self.schema)
109-
self.db.write_schema(self.schema)
110-
else:
111-
self.db.add_documents(docs)
62+
self.db.add_documents(docs)

0 commit comments

Comments
 (0)