Skip to content

Commit f02702f

Browse files
committed
Implement backwards compatibility for text/texts arguments to vectorizers
1 parent 9ce254f commit f02702f

File tree

12 files changed

+382
-31
lines changed

12 files changed

+382
-31
lines changed

redisvl/utils/vectorize/base.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from redisvl.extensions.cache.embeddings import EmbeddingsCache
1111
from redisvl.redis.utils import array_to_buffer
1212
from redisvl.schema.fields import VectorDataType
13+
from redisvl.utils.utils import deprecated_argument
1314

1415
try:
1516
from PIL.Image import Image
@@ -67,9 +68,11 @@ def check_dtype(cls, dtype):
6768
)
6869
return dtype
6970

71+
@deprecated_argument("text", "content")
7072
def embed(
7173
self,
72-
content: Any,
74+
content: Any = None,
75+
text: Any = None,
7376
preprocess: Optional[Callable] = None,
7477
as_buffer: bool = False,
7578
skip_cache: bool = False,
@@ -79,6 +82,7 @@ def embed(
7982
8083
Args:
8184
content: The content to convert to a vector embedding
85+
text: The text to convert to a vector embedding (deprecated - use `content` instead)
8286
preprocess: Function to apply to the content before embedding
8387
as_buffer: Return the embedding as a binary buffer instead of a list
8488
skip_cache: Bypass the cache for this request
@@ -91,6 +95,10 @@ def embed(
9195
>>> embedding = text_vectorizer.embed("Hello world")
9296
>>> embedding = image_vectorizer.embed(Image.open("test.png"))
9397
"""
98+
content = content or text
99+
if not content:
100+
raise ValueError("No content provided to embed.")
101+
94102
# Apply preprocessing if provided
95103
if preprocess is not None:
96104
content = preprocess(content)
@@ -128,9 +136,11 @@ def embed(
128136
# Process and return result
129137
return self._process_embedding(embedding, as_buffer, self.dtype)
130138

139+
@deprecated_argument("texts", "contents")
131140
def embed_many(
132141
self,
133-
contents: List[Any],
142+
contents: Optional[List[Any]] = None,
143+
texts: Optional[List[Any]] = None,
134144
preprocess: Optional[Callable] = None,
135145
batch_size: int = 10,
136146
as_buffer: bool = False,
@@ -141,6 +151,7 @@ def embed_many(
141151
142152
Args:
143153
contents: List of content to convert to vector embeddings
154+
texts: List of texts to convert to vector embeddings (deprecated - use `contents` instead)
144155
preprocess: Function to apply to each item before embedding
145156
batch_size: Number of items to process in each API call
146157
as_buffer: Return embeddings as binary buffers instead of lists
@@ -153,6 +164,7 @@ def embed_many(
153164
Examples:
154165
>>> embeddings = vectorizer.embed_many(["Hello", "World"], batch_size=2)
155166
"""
167+
contents = contents or texts
156168
if not contents:
157169
return []
158170

@@ -186,9 +198,11 @@ def embed_many(
186198
# Process and return results
187199
return [self._process_embedding(emb, as_buffer, self.dtype) for emb in results]
188200

201+
@deprecated_argument("text", "content")
189202
async def aembed(
190203
self,
191-
content: Any,
204+
content: Any = None,
205+
text: Any = None,
192206
preprocess: Optional[Callable] = None,
193207
as_buffer: bool = False,
194208
skip_cache: bool = False,
@@ -198,6 +212,7 @@ async def aembed(
198212
199213
Args:
200214
content: The content to convert to a vector embedding
215+
text: The text to convert to a vector embedding (deprecated - use `content` instead)
201216
preprocess: Function to apply to the content before embedding
202217
as_buffer: Return the embedding as a binary buffer instead of a list
203218
skip_cache: Bypass the cache for this request
@@ -209,6 +224,10 @@ async def aembed(
209224
Examples:
210225
>>> embedding = await vectorizer.aembed("Hello world")
211226
"""
227+
content = content or text
228+
if not content:
229+
raise ValueError("No content provided to embed.")
230+
212231
# Apply preprocessing if provided
213232
if preprocess is not None:
214233
content = preprocess(content)
@@ -250,9 +269,11 @@ async def aembed(
250269
# Process and return result
251270
return self._process_embedding(embedding, as_buffer, self.dtype)
252271

272+
@deprecated_argument("texts", "contents")
253273
async def aembed_many(
254274
self,
255-
contents: List[Any],
275+
contents: Optional[List[Any]] = None,
276+
texts: Optional[List[Any]] = None,
256277
preprocess: Optional[Callable] = None,
257278
batch_size: int = 10,
258279
as_buffer: bool = False,
@@ -263,6 +284,7 @@ async def aembed_many(
263284
264285
Args:
265286
contents: List of content to convert to vector embeddings
287+
texts: List of texts to convert to vector embeddings (deprecated - use `contents` instead)
266288
preprocess: Function to apply to each item before embedding
267289
batch_size: Number of texts to process in each API call
268290
as_buffer: Return embeddings as binary buffers instead of lists
@@ -275,6 +297,7 @@ async def aembed_many(
275297
Examples:
276298
>>> embeddings = await vectorizer.aembed_many(["Hello", "World"], batch_size=2)
277299
"""
300+
contents = contents or texts
278301
if not contents:
279302
return []
280303

@@ -308,31 +331,45 @@ async def aembed_many(
308331
# Process and return results
309332
return [self._process_embedding(emb, as_buffer, self.dtype) for emb in results]
310333

311-
def _embed(self, content: Any, **kwargs) -> List[float]:
334+
@deprecated_argument("text", "content")
335+
def _embed(self, text: Any = "", content: Any = "", **kwargs) -> List[float]:
312336
"""Generate a vector embedding for a single item."""
313337
raise NotImplementedError
314338

339+
@deprecated_argument("texts", "contents")
315340
def _embed_many(
316-
self, contents: List[Any], batch_size: int = 10, **kwargs
341+
self,
342+
contents: Optional[List[Any]] = None,
343+
texts: Optional[List[Any]] = None,
344+
batch_size: int = 10,
345+
**kwargs,
317346
) -> List[List[float]]:
318347
"""Generate vector embeddings for a batch of items."""
319348
raise NotImplementedError
320349

321-
async def _aembed(self, content: Any, **kwargs) -> List[float]:
350+
@deprecated_argument("text", "content")
351+
async def _aembed(self, content: Any = "", text: Any = "", **kwargs) -> List[float]:
322352
"""Asynchronously generate a vector embedding for a single item."""
323353
logger.warning(
324354
"This vectorizer has no async embed method. Falling back to sync."
325355
)
326-
return self._embed(content, **kwargs)
356+
return self._embed(content=content or text, **kwargs)
327357

358+
@deprecated_argument("texts", "contents")
328359
async def _aembed_many(
329-
self, contents: List[Any], batch_size: int = 10, **kwargs
360+
self,
361+
contents: Optional[List[Any]] = None,
362+
texts: Optional[List[Any]] = None,
363+
batch_size: int = 10,
364+
**kwargs,
330365
) -> List[List[float]]:
331366
"""Asynchronously generate vector embeddings for a batch of items."""
332367
logger.warning(
333368
"This vectorizer has no async embed_many method. Falling back to sync."
334369
)
335-
return self._embed_many(contents, batch_size, **kwargs)
370+
return self._embed_many(
371+
contents=contents or texts, batch_size=batch_size, **kwargs
372+
)
336373

337374
def _get_from_cache_batch(
338375
self, contents: List[Any], skip_cache: bool

redisvl/utils/vectorize/text/azureopenai.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,17 +214,19 @@ def _set_model_dims(self) -> int:
214214
# fall back (TODO get more specific)
215215
raise ValueError(f"Error setting embedding model dimensions: {str(e)}")
216216

217+
@deprecated_argument("text", "content")
217218
@retry(
218219
wait=wait_random_exponential(min=1, max=60),
219220
stop=stop_after_attempt(6),
220221
retry=retry_if_not_exception_type(TypeError),
221222
)
222-
def _embed(self, content: str, **kwargs) -> List[float]:
223+
def _embed(self, content: str = "", text: str = "", **kwargs) -> List[float]:
223224
"""
224225
Generate a vector embedding for a single text using the AzureOpenAI API.
225226
226227
Args:
227228
content: Text to embed
229+
text: Text to embed (deprecated - use `content` instead)
228230
**kwargs: Additional parameters to pass to the AzureOpenAI API
229231
230232
Returns:
@@ -234,6 +236,7 @@ def _embed(self, content: str, **kwargs) -> List[float]:
234236
TypeError: If text is not a string
235237
ValueError: If embedding fails
236238
"""
239+
content = content or text
237240
if not isinstance(content, str):
238241
raise TypeError("Must pass in a str value to embed.")
239242

@@ -245,19 +248,25 @@ def _embed(self, content: str, **kwargs) -> List[float]:
245248
except Exception as e:
246249
raise ValueError(f"Embedding text failed: {e}")
247250

251+
@deprecated_argument("texts", "contents")
248252
@retry(
249253
wait=wait_random_exponential(min=1, max=60),
250254
stop=stop_after_attempt(6),
251255
retry=retry_if_not_exception_type(TypeError),
252256
)
253257
def _embed_many(
254-
self, contents: List[str], batch_size: int = 10, **kwargs
258+
self,
259+
contents: Optional[List[str]] = None,
260+
texts: Optional[List[str]] = None,
261+
batch_size: int = 10,
262+
**kwargs,
255263
) -> List[List[float]]:
256264
"""
257265
Generate vector embeddings for a batch of texts using the AzureOpenAI API.
258266
259267
Args:
260268
contents: List of texts to embed
269+
texts: List of texts to embed (deprecated - use `contents` instead)
261270
batch_size: Number of texts to process in each API call
262271
**kwargs: Additional parameters to pass to the AzureOpenAI API
263272
@@ -268,6 +277,7 @@ def _embed_many(
268277
TypeError: If contents is not a list of strings
269278
ValueError: If embedding fails
270279
"""
280+
contents = contents or texts
271281
if not isinstance(contents, list):
272282
raise TypeError("Must pass in a list of str values to embed.")
273283
if contents and not isinstance(contents[0], str):
@@ -284,17 +294,19 @@ def _embed_many(
284294
except Exception as e:
285295
raise ValueError(f"Embedding texts failed: {e}")
286296

297+
@deprecated_argument("text", "content")
287298
@retry(
288299
wait=wait_random_exponential(min=1, max=60),
289300
stop=stop_after_attempt(6),
290301
retry=retry_if_not_exception_type(TypeError),
291302
)
292-
async def _aembed(self, content: str, **kwargs) -> List[float]:
303+
async def _aembed(self, content: str = "", text: str = "", **kwargs) -> List[float]:
293304
"""
294305
Asynchronously generate a vector embedding for a single text using the AzureOpenAI API.
295306
296307
Args:
297308
content: Text to embed
309+
text: Text to embed (deprecated - use `content` instead)
298310
**kwargs: Additional parameters to pass to the AzureOpenAI API
299311
300312
Returns:
@@ -304,6 +316,7 @@ async def _aembed(self, content: str, **kwargs) -> List[float]:
304316
TypeError: If content is not a string
305317
ValueError: If embedding fails
306318
"""
319+
content = content or text
307320
if not isinstance(content, str):
308321
raise TypeError("Must pass in a str value to embed.")
309322

@@ -315,19 +328,25 @@ async def _aembed(self, content: str, **kwargs) -> List[float]:
315328
except Exception as e:
316329
raise ValueError(f"Embedding text failed: {e}")
317330

331+
@deprecated_argument("texts", "contents")
318332
@retry(
319333
wait=wait_random_exponential(min=1, max=60),
320334
stop=stop_after_attempt(6),
321335
retry=retry_if_not_exception_type(TypeError),
322336
)
323337
async def _aembed_many(
324-
self, contents: List[str], batch_size: int = 10, **kwargs
338+
self,
339+
contents: Optional[List[str]] = None,
340+
texts: Optional[List[str]] = None,
341+
batch_size: int = 10,
342+
**kwargs,
325343
) -> List[List[float]]:
326344
"""
327345
Asynchronously generate vector embeddings for a batch of texts using the AzureOpenAI API.
328346
329347
Args:
330348
contents: List of texts to embed
349+
texts: List of texts to embed (deprecated - use `contents` instead)
331350
batch_size: Number of texts to process in each API call
332351
**kwargs: Additional parameters to pass to the AzureOpenAI API
333352
@@ -338,6 +357,7 @@ async def _aembed_many(
338357
TypeError: If contents is not a list of strings
339358
ValueError: If embedding fails
340359
"""
360+
contents = contents or texts
341361
if not isinstance(contents, list):
342362
raise TypeError("Must pass in a list of str values to embed.")
343363
if contents and not isinstance(contents[0], str):
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import warnings
2+
from typing import Any, List, Optional, Union
3+
4+
from redisvl.utils.utils import (
5+
deprecated_argument,
6+
deprecated_class,
7+
deprecated_function,
8+
)
9+
from redisvl.utils.vectorize.bedrock import BedrockVectorizer
10+
11+
12+
@deprecated_class(
13+
name="BedrockTextVectorizer", replacement="Use BedrockVectorizer instead."
14+
)
15+
class BedrockTextVectorizer(BedrockVectorizer):
16+
"""A backwards-compatible alias for BedrockVectorizer."""
17+
18+
@deprecated_argument("text", "content")
19+
def embed(
20+
self, content: Any = "", text: Any = "", **kwargs
21+
) -> Union[List[float], bytes]:
22+
"""Generate a vector embedding for a single input using the AWS Bedrock API.
23+
24+
Deprecated: Use `BedrockVectorizer.embed` instead.
25+
"""
26+
content = content or text
27+
return super().embed(content=content, **kwargs)
28+
29+
@deprecated_argument("texts", "contents")
30+
def embed_many(
31+
self,
32+
contents: Optional[List[Any]] = None,
33+
texts: Optional[List[Any]] = None,
34+
**kwargs,
35+
) -> List[List[float]]:
36+
"""Generate vector embeddings for a batch of inputs using the AWS Bedrock API.
37+
38+
Deprecated: Use `BedrockVectorizer.embed_many` instead.
39+
"""
40+
contents = contents or texts
41+
return super().embed_many(contents=contents, **kwargs)

0 commit comments

Comments
 (0)