Skip to content

Commit ec463cc

Browse files
committed
fix: support multiple prefixes in SearchIndex.from_existing() (#258)
Fixed bug in convert_index_info_to_schema() where only the first prefix was captured from Redis indices with multiple prefixes. Updated code to handle Union[str, List[str]] prefix type by normalizing to first prefix when constructing Redis keys. This maintains backward compatibility while supporting multiple prefixes in schema definition. - Added normalization in prefix property (index.py) - Normalized prefix in _create_key method (storage.py) - Updated key() method to use normalized prefix property Maintains backward compatibility by converting single-element prefix lists to strings when loading from Redis. This ensures schema comparisons work correctly when comparing existing indices with new configurations. - Updated convert_index_info_to_schema to normalize single prefixes - Updated unit tests to reflect normalization behavior - Fixes schema comparison issues in semantic router and cache extensions - Fixed vector field parsing to support both Redis 6.2.x and 7.x+ formats: Redis 6.2.x format: [..., "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...] Position 6: algorithm value (FLAT/HNSW) Position 7: param count Position 8+: key-value pairs Redis 7.x+ format: [..., "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...] Position 6+: all key-value pairs The parser now detects the format by checking if position 6 is an algorithm value (FLAT/HNSW) vs a key. For the old format, it stores the algorithm and starts parsing key-value pairs from position 8. Also added fallback logic to scan raw attrs if dims is not found through normal parsing, and better handling of the "type" field which may be named "data_type", "datatype", or just "type". Validates that dims is present and raises clear error if missing.
1 parent 82ddb58 commit ec463cc

File tree

6 files changed

+240
-10
lines changed

6 files changed

+240
-10
lines changed

redisvl/index/index.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,10 @@ def name(self) -> str:
245245
@property
246246
def prefix(self) -> str:
247247
"""The optional key prefix that comes before a unique key value in
248-
forming a Redis key."""
249-
return self.schema.index.prefix
248+
forming a Redis key. If multiple prefixes are configured, returns the
249+
first one."""
250+
prefix = self.schema.index.prefix
251+
return prefix[0] if isinstance(prefix, list) else prefix
250252

251253
@property
252254
def key_separator(self) -> str:
@@ -329,7 +331,7 @@ def key(self, id: str) -> str:
329331
"""
330332
return self._storage._key(
331333
id=id,
332-
prefix=self.schema.index.prefix,
334+
prefix=self.prefix,
333335
key_separator=self.schema.index.key_separator,
334336
)
335337

redisvl/index/storage.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,13 @@ def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> st
114114
except KeyError:
115115
raise ValueError(f"Key field {id_field} not found in record {obj}")
116116

117+
# Normalize prefix: use first prefix if multiple are configured
118+
prefix = self.index_schema.index.prefix
119+
normalized_prefix = prefix[0] if isinstance(prefix, list) else prefix
120+
117121
return self._key(
118122
key_value,
119-
prefix=self.index_schema.index.prefix,
123+
prefix=normalized_prefix,
120124
key_separator=self.index_schema.index.key_separator,
121125
)
122126

redisvl/redis/connection.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,31 +133,66 @@ def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]:
133133
Dict[str, Any]: Schema dictionary.
134134
"""
135135
index_name = index_info["index_name"]
136-
prefixes = index_info["index_definition"][3][0]
136+
prefixes = index_info["index_definition"][3]
137+
# Normalize single-element prefix lists to string for backward compatibility
138+
if isinstance(prefixes, list) and len(prefixes) == 1:
139+
prefixes = prefixes[0]
137140
storage_type = index_info["index_definition"][1].lower()
138141

139142
index_fields = index_info["attributes"]
140143

141144
def parse_vector_attrs(attrs):
142145
# Parse vector attributes from Redis FT.INFO output
143-
# Attributes start at position 6 as key-value pairs
146+
# Format varies between Redis versions:
147+
# - Redis 6.2.x: [... "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...]
148+
# Position 6: algorithm value (e.g., "FLAT" or "HNSW")
149+
# Position 7: param count
150+
# Position 8+: key-value pairs
151+
# - Redis 7.x+: [... "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...]
152+
# Position 6+: all key-value pairs
153+
144154
vector_attrs = {}
155+
start_pos = 6
156+
157+
# Detect format: if position 6 looks like an algorithm value (not a key),
158+
# we're dealing with the older format
159+
if len(attrs) > 6:
160+
pos6_str = str(attrs[6]).upper()
161+
# Check if position 6 is an algorithm value (FLAT, HNSW) vs a key (ALGORITHM, TYPE, DIM)
162+
if pos6_str in ("FLAT", "HNSW"):
163+
# Old format (Redis 6.2.x): position 6 is algorithm value, position 7 is param count
164+
# Store the algorithm
165+
vector_attrs["algorithm"] = pos6_str
166+
# Skip to position 8 where key-value pairs start
167+
start_pos = 8
168+
145169
try:
146-
for i in range(6, len(attrs), 2):
170+
for i in range(start_pos, len(attrs), 2):
147171
if i + 1 < len(attrs):
148172
key = str(attrs[i]).lower()
149173
vector_attrs[key] = attrs[i + 1]
150174
except (IndexError, TypeError, ValueError):
175+
# Silently continue - we'll validate required fields below
151176
pass
152177

153178
# Normalize to expected field names
154179
normalized = {}
155180

156-
# Handle dims/dim field
181+
# Handle dims/dim field - REQUIRED for vector fields
157182
if "dim" in vector_attrs:
158183
normalized["dims"] = int(vector_attrs.pop("dim"))
159184
elif "dims" in vector_attrs:
160185
normalized["dims"] = int(vector_attrs["dims"])
186+
else:
187+
# If dims is missing from normal parsing, try scanning the raw attrs
188+
# This handles edge cases where the format is unexpected
189+
for i in range(6, len(attrs) - 1):
190+
if str(attrs[i]).upper() in ("DIM", "DIMS"):
191+
try:
192+
normalized["dims"] = int(attrs[i + 1])
193+
break
194+
except (ValueError, IndexError):
195+
pass
161196

162197
# Handle distance_metric field
163198
if "distance_metric" in vector_attrs:
@@ -178,10 +213,20 @@ def parse_vector_attrs(attrs):
178213
normalized["datatype"] = vector_attrs["data_type"].lower()
179214
elif "datatype" in vector_attrs:
180215
normalized["datatype"] = vector_attrs["datatype"].lower()
216+
elif "type" in vector_attrs:
217+
# Sometimes it's just "type" instead of "data_type"
218+
normalized["datatype"] = vector_attrs["type"].lower()
181219
else:
182220
# Default to float32 if missing
183221
normalized["datatype"] = "float32"
184222

223+
# Validate that we have required dims
224+
if "dims" not in normalized:
225+
raise ValueError(
226+
f"Could not parse required 'dims' parameter from vector field attributes. "
227+
f"Raw attrs: {attrs}, Parsed: {vector_attrs}"
228+
)
229+
185230
return normalized
186231

187232
def parse_attrs(attrs, field_type=None):

redisvl/schema/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ class IndexInfo(BaseModel):
5858

5959
name: str
6060
"""The unique name of the index."""
61-
prefix: str = "rvl"
62-
"""The prefix used for Redis keys associated with this index."""
61+
prefix: Union[str, List[str]] = "rvl"
62+
"""The prefix(es) used for Redis keys associated with this index. Can be a single string or a list of strings."""
6363
key_separator: str = ":"
6464
"""The separator character used in designing Redis keys."""
6565
storage_type: StorageType = StorageType.HASH

tests/integration/test_search_index.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,73 @@ def test_search_index_from_existing_complex(client):
153153
assert index.schema == index2.schema
154154

155155

156+
def test_search_index_from_existing_multiple_prefixes(client):
157+
"""Test that from_existing correctly handles indices with multiple prefixes (issue #258)."""
158+
from redis.commands.search.field import TextField, VectorField
159+
160+
index_name = "test_multi_prefix"
161+
162+
# Create index manually using redis-py with multiple prefixes
163+
# This simulates an index created with: FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: ...
164+
try:
165+
# Clean up any existing index
166+
try:
167+
client.ft(index_name).dropindex(delete_documents=True)
168+
except Exception:
169+
pass
170+
171+
# Create index using raw FT.CREATE command with multiple prefixes
172+
# FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: SCHEMA user TAG text TEXT ...
173+
client.execute_command(
174+
"FT.CREATE",
175+
index_name,
176+
"ON",
177+
"HASH",
178+
"PREFIX",
179+
"3",
180+
"prefix_a:",
181+
"prefix_b:",
182+
"prefix_c:",
183+
"SCHEMA",
184+
"user",
185+
"TAG",
186+
"text",
187+
"TEXT",
188+
"embedding",
189+
"VECTOR",
190+
"FLAT",
191+
"6",
192+
"TYPE",
193+
"FLOAT32",
194+
"DIM",
195+
"3",
196+
"DISTANCE_METRIC",
197+
"COSINE",
198+
)
199+
200+
# Now test from_existing - this is where the bug was
201+
loaded_index = SearchIndex.from_existing(index_name, redis_client=client)
202+
203+
# Verify all prefixes are preserved (this was failing before fix)
204+
# Before the fix, only "prefix_a:" would be returned
205+
assert loaded_index.schema.index.prefix == [
206+
"prefix_a:",
207+
"prefix_b:",
208+
"prefix_c:",
209+
]
210+
211+
# Verify the index name and storage type
212+
assert loaded_index.schema.index.name == index_name
213+
assert loaded_index.schema.index.storage_type.value == "hash"
214+
215+
finally:
216+
# Cleanup
217+
try:
218+
client.ft(index_name).dropindex(delete_documents=True)
219+
except Exception:
220+
pass
221+
222+
156223
def test_search_index_no_prefix(index_schema):
157224
# specify an explicitly empty prefix...
158225
index_schema.index.prefix = ""
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
"""Unit tests for convert_index_info_to_schema function."""
2+
3+
import pytest
4+
5+
from redisvl.redis.connection import convert_index_info_to_schema
6+
7+
8+
def test_convert_index_info_single_prefix():
9+
"""Test converting index info with a single prefix.
10+
11+
Single-element prefix lists are normalized to strings for backward compatibility.
12+
"""
13+
index_info = {
14+
"index_name": "test_index",
15+
"index_definition": [
16+
"key_type",
17+
"HASH",
18+
"prefixes",
19+
["prefix_a"],
20+
],
21+
"attributes": [],
22+
}
23+
24+
result = convert_index_info_to_schema(index_info)
25+
26+
assert result["index"]["name"] == "test_index"
27+
assert result["index"]["prefix"] == "prefix_a" # Normalized to string
28+
assert result["index"]["storage_type"] == "hash"
29+
30+
31+
def test_convert_index_info_multiple_prefixes():
32+
"""Test converting index info with multiple prefixes (issue #258)."""
33+
index_info = {
34+
"index_name": "test_index",
35+
"index_definition": [
36+
"key_type",
37+
"HASH",
38+
"prefixes",
39+
["prefix_a", "prefix_b", "prefix_c"],
40+
],
41+
"attributes": [],
42+
}
43+
44+
result = convert_index_info_to_schema(index_info)
45+
46+
assert result["index"]["name"] == "test_index"
47+
assert result["index"]["prefix"] == ["prefix_a", "prefix_b", "prefix_c"]
48+
assert result["index"]["storage_type"] == "hash"
49+
50+
51+
def test_convert_index_info_json_storage():
52+
"""Test converting index info with JSON storage type.
53+
54+
Single-element prefix lists are normalized to strings for backward compatibility.
55+
"""
56+
index_info = {
57+
"index_name": "test_json_index",
58+
"index_definition": [
59+
"key_type",
60+
"JSON",
61+
"prefixes",
62+
["json_prefix"],
63+
],
64+
"attributes": [],
65+
}
66+
67+
result = convert_index_info_to_schema(index_info)
68+
69+
assert result["index"]["name"] == "test_json_index"
70+
assert result["index"]["prefix"] == "json_prefix" # Normalized to string
71+
assert result["index"]["storage_type"] == "json"
72+
73+
74+
def test_convert_index_info_with_fields():
75+
"""Test converting index info with field definitions."""
76+
index_info = {
77+
"index_name": "test_index",
78+
"index_definition": [
79+
"key_type",
80+
"HASH",
81+
"prefixes",
82+
["prefix_a", "prefix_b"],
83+
],
84+
"attributes": [
85+
[
86+
"identifier",
87+
"user",
88+
"attribute",
89+
"user",
90+
"type",
91+
"TAG",
92+
],
93+
[
94+
"identifier",
95+
"text",
96+
"attribute",
97+
"text",
98+
"type",
99+
"TEXT",
100+
],
101+
],
102+
}
103+
104+
result = convert_index_info_to_schema(index_info)
105+
106+
assert result["index"]["name"] == "test_index"
107+
assert result["index"]["prefix"] == ["prefix_a", "prefix_b"]
108+
assert len(result["fields"]) == 2
109+
assert result["fields"][0]["name"] == "user"
110+
assert result["fields"][0]["type"] == "tag"
111+
assert result["fields"][1]["name"] == "text"
112+
assert result["fields"][1]["type"] == "text"

0 commit comments

Comments
 (0)