Skip to content

Commit 6157e4a

Browse files
committed
fix: improve key separator handling to avoid double separators (#368)
Normalize key prefixes by removing trailing separators when constructing Redis keys to prevent double separator issues (e.g., "user::123" becomes "user:123"). Changes: - Fix BaseStorage._key() to strip trailing separators from prefix - Update SemanticRouter to use index's key_separator instead of hardcoded ':' - Extract route pattern generation into reusable _route_pattern() method - Fix scan pattern generation to respect custom separators
1 parent 547085a commit 6157e4a

File tree

3 files changed

+318
-10
lines changed

3 files changed

+318
-10
lines changed

redisvl/extensions/router/semantic.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,25 @@ def update_route_thresholds(self, route_thresholds: Dict[str, Optional[float]]):
221221

222222
@staticmethod
223223
def _route_ref_key(index: SearchIndex, route_name: str, reference_hash: str) -> str:
224-
"""Generate the route reference key."""
225-
return f"{index.prefix}:{route_name}:{reference_hash}"
224+
"""Generate the route reference key using the index's key_separator."""
225+
sep = index.key_separator
226+
# Normalize prefix to avoid double separators
227+
prefix = index.prefix.rstrip(sep) if sep and index.prefix else index.prefix
228+
if prefix:
229+
return f"{prefix}{sep}{route_name}{sep}{reference_hash}"
230+
else:
231+
return f"{route_name}{sep}{reference_hash}"
232+
233+
@staticmethod
234+
def _route_pattern(index: SearchIndex, route_name: str) -> str:
235+
"""Generate a search pattern for route references."""
236+
sep = index.key_separator
237+
# Normalize prefix to avoid double separators
238+
prefix = index.prefix.rstrip(sep) if sep and index.prefix else index.prefix
239+
if prefix:
240+
return f"{prefix}{sep}{route_name}{sep}*"
241+
else:
242+
return f"{route_name}{sep}*"
226243

227244
def _add_routes(self, routes: List[Route]):
228245
"""Add routes to the router and index.
@@ -731,12 +748,12 @@ def get_route_references(
731748
queries = self._make_filter_queries(reference_ids)
732749
elif route_name:
733750
if not keys:
734-
keys = scan_by_pattern(
735-
self._index.client, f"{self._index.prefix}:{route_name}:*" # type: ignore
736-
)
751+
pattern = self._route_pattern(self._index, route_name)
752+
keys = scan_by_pattern(self._index.client, pattern) # type: ignore
737753

754+
sep = self._index.key_separator
738755
queries = self._make_filter_queries(
739-
[key.split(":")[-1] for key in convert_bytes(keys)]
756+
[key.split(sep)[-1] for key in convert_bytes(keys)]
740757
)
741758
else:
742759
raise ValueError(
@@ -769,9 +786,8 @@ def delete_route_references(
769786
res = self._index.batch_query(queries)
770787
keys = [r[0]["id"] for r in res if len(r) > 0]
771788
elif not keys:
772-
keys = scan_by_pattern(
773-
self._index.client, f"{self._index.prefix}:{route_name}:*" # type: ignore
774-
)
789+
pattern = self._route_pattern(self._index, route_name)
790+
keys = scan_by_pattern(self._index.client, pattern) # type: ignore
775791

776792
if not keys:
777793
raise ValueError(f"No references found for route {route_name}")

redisvl/index/storage.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,15 @@ def _key(id: str, prefix: str, key_separator: str) -> str:
8181
if not prefix:
8282
return id
8383
else:
84-
return f"{prefix}{key_separator}{id}"
84+
# Normalize prefix by removing trailing separators to avoid doubles
85+
normalized_prefix = (
86+
prefix.rstrip(key_separator) if key_separator else prefix
87+
)
88+
if normalized_prefix:
89+
return f"{normalized_prefix}{key_separator}{id}"
90+
else:
91+
# If prefix was only separators, just return the id
92+
return id
8593

8694
def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> str:
8795
"""Construct a Redis key for a given object, optionally using a
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
"""
2+
Test proper handling of key separators and prefixes.
3+
TDD test file for issue #368.
4+
5+
These tests verify that key separators are handled correctly when:
6+
1. Prefix ends with the separator
7+
2. Custom separators are used
8+
3. Keys are constructed in different components
9+
"""
10+
11+
import pytest
12+
from redis import Redis
13+
from redis.commands.search.index_definition import IndexType
14+
15+
from redisvl.extensions.router import Route, SemanticRouter
16+
from redisvl.index import SearchIndex
17+
from redisvl.index.storage import HashStorage, JsonStorage
18+
from redisvl.schema import IndexSchema
19+
20+
21+
class TestKeySeparatorHandling:
22+
"""Tests for proper key separator handling across the codebase."""
23+
24+
def test_prefix_ending_with_separator_no_double_separator(self):
25+
"""Test that prefix ending with separator doesn't create double separators."""
26+
# Create schema with prefix ending in separator
27+
schema_dict = {
28+
"index": {
29+
"name": "test_index",
30+
"prefix": "user:", # Prefix ends with separator
31+
"key_separator": ":",
32+
"storage_type": "hash",
33+
},
34+
"fields": [{"name": "content", "type": "text"}],
35+
}
36+
schema = IndexSchema.from_dict(schema_dict)
37+
storage = HashStorage(index_schema=schema)
38+
39+
# Create a key
40+
key = storage._key("123", schema.index.prefix, schema.index.key_separator)
41+
42+
# Should not have double separator
43+
assert key == "user:123", f"Expected 'user:123' but got '{key}'"
44+
assert "::" not in key, f"Key has double separator: {key}"
45+
46+
def test_custom_separator_used_consistently(self):
47+
"""Test that custom key_separator is used throughout."""
48+
# Create schema with custom separator
49+
schema_dict = {
50+
"index": {
51+
"name": "test_index",
52+
"prefix": "user",
53+
"key_separator": "-", # Custom separator
54+
"storage_type": "json",
55+
},
56+
"fields": [{"name": "content", "type": "text"}],
57+
}
58+
schema = IndexSchema.from_dict(schema_dict)
59+
storage = JsonStorage(index_schema=schema)
60+
61+
# Create a key with custom separator
62+
key = storage._key("456", schema.index.prefix, schema.index.key_separator)
63+
64+
# Should use custom separator
65+
assert key == "user-456", f"Expected 'user-456' but got '{key}'"
66+
assert ":" not in key, f"Key uses default separator instead of custom: {key}"
67+
68+
def test_empty_prefix_handled_correctly(self):
69+
"""Test that empty prefix is handled correctly."""
70+
schema_dict = {
71+
"index": {
72+
"name": "test_index",
73+
"prefix": "", # Empty prefix
74+
"key_separator": ":",
75+
"storage_type": "hash",
76+
},
77+
"fields": [{"name": "content", "type": "text"}],
78+
}
79+
schema = IndexSchema.from_dict(schema_dict)
80+
storage = HashStorage(index_schema=schema)
81+
82+
# Create a key with empty prefix
83+
key = storage._key("789", schema.index.prefix, schema.index.key_separator)
84+
85+
# Should return just the ID without prefix or separator
86+
assert key == "789", f"Expected '789' but got '{key}'"
87+
88+
def test_semantic_router_uses_index_separator(self, redis_url):
89+
"""Test that SemanticRouter uses the index's key_separator."""
90+
# Create a route
91+
route = Route(
92+
name="test_route", references=["hello", "hi"], distance_threshold=0.5
93+
)
94+
95+
# Create router with routes
96+
router = SemanticRouter(
97+
name="test_router_sep",
98+
routes=[route],
99+
redis_url=redis_url,
100+
overwrite=True,
101+
)
102+
103+
# Modify the index schema to use custom separator
104+
router._index.schema.index.key_separator = "|"
105+
router._index.schema.index.prefix = "router"
106+
107+
# Check that route reference keys use the custom separator
108+
route_key = router._route_ref_key(router._index, "test_route", "ref123")
109+
110+
# Should use custom separator
111+
assert "|" in route_key, f"Route key doesn't use custom separator: {route_key}"
112+
assert (
113+
route_key.count(":") == 0
114+
), f"Route key uses default separator: {route_key}"
115+
assert (
116+
route_key == "router|test_route|ref123"
117+
), f"Unexpected route key: {route_key}"
118+
119+
def test_prefix_with_separator_and_custom_separator(self):
120+
"""Test handling when prefix contains old separator and we use a new one."""
121+
schema_dict = {
122+
"index": {
123+
"name": "test_index",
124+
"prefix": "app:user", # Prefix contains ':'
125+
"key_separator": "-", # But we use '-' as separator
126+
"storage_type": "hash",
127+
},
128+
"fields": [{"name": "content", "type": "text"}],
129+
}
130+
schema = IndexSchema.from_dict(schema_dict)
131+
storage = HashStorage(index_schema=schema)
132+
133+
# Create a key
134+
key = storage._key("999", schema.index.prefix, schema.index.key_separator)
135+
136+
# Should use the key_separator, not the : in prefix
137+
assert key == "app:user-999", f"Expected 'app:user-999' but got '{key}'"
138+
139+
def test_special_characters_in_separator(self):
140+
"""Test that special characters work as separators."""
141+
special_separators = ["_", "::", "->", ".", "/"]
142+
143+
for sep in special_separators:
144+
schema_dict = {
145+
"index": {
146+
"name": "test_index",
147+
"prefix": "data",
148+
"key_separator": sep,
149+
"storage_type": "json",
150+
},
151+
"fields": [{"name": "content", "type": "text"}],
152+
}
153+
schema = IndexSchema.from_dict(schema_dict)
154+
storage = JsonStorage(index_schema=schema)
155+
156+
key = storage._key("id", schema.index.prefix, schema.index.key_separator)
157+
expected = f"data{sep}id"
158+
assert (
159+
key == expected
160+
), f"For separator '{sep}': expected '{expected}' but got '{key}'"
161+
162+
def test_trailing_separator_normalization(self):
163+
"""Test that trailing separators in prefix are normalized."""
164+
test_cases = [
165+
("user:", ":", "123", "user:123"), # Prefix ends with separator
166+
("user::", ":", "456", "user:456"), # Prefix ends with double separator
167+
("user", ":", "789", "user:789"), # Normal case
168+
("user-", "-", "abc", "user-abc"), # Custom separator
169+
]
170+
171+
for prefix, separator, id_val, expected in test_cases:
172+
schema_dict = {
173+
"index": {
174+
"name": "test_index",
175+
"prefix": prefix,
176+
"key_separator": separator,
177+
"storage_type": "hash",
178+
},
179+
"fields": [{"name": "content", "type": "text"}],
180+
}
181+
schema = IndexSchema.from_dict(schema_dict)
182+
storage = HashStorage(index_schema=schema)
183+
184+
key = storage._key(id_val, schema.index.prefix, schema.index.key_separator)
185+
186+
# Check for expected normalization
187+
assert (
188+
key == expected
189+
), f"For prefix='{prefix}', sep='{separator}', id='{id_val}': expected '{expected}' but got '{key}'"
190+
191+
192+
class TestSemanticRouterKeyConstruction:
193+
"""Test SemanticRouter's key construction with separators."""
194+
195+
def test_router_respects_modified_key_separator(self, redis_url):
196+
"""Test that SemanticRouter respects modified key separators."""
197+
route = Route(
198+
name="test_route", references=["hello", "hi"], distance_threshold=0.5
199+
)
200+
201+
router = SemanticRouter(
202+
name="router_sep_test",
203+
routes=[route],
204+
redis_url=redis_url,
205+
overwrite=True,
206+
)
207+
208+
# Test with different separators
209+
for separator in [":", "-", "_", "|"]:
210+
router._index.schema.index.key_separator = separator
211+
router._index.schema.index.prefix = "routes"
212+
213+
# Test internal key generation
214+
route_key = router._route_ref_key(router._index, "route1", "ref1")
215+
216+
# Should use the configured separator
217+
expected = f"routes{separator}route1{separator}ref1"
218+
assert (
219+
route_key == expected
220+
), f"For sep '{separator}': Expected '{expected}' but got '{route_key}'"
221+
222+
def test_router_with_prefix_ending_in_separator(self, redis_url):
223+
"""Test SemanticRouter when prefix ends with separator."""
224+
route = Route(
225+
name="test_route", references=["hello", "hi"], distance_threshold=0.5
226+
)
227+
228+
router = SemanticRouter(
229+
name="router_trailing_test",
230+
routes=[route],
231+
redis_url=redis_url,
232+
overwrite=True,
233+
)
234+
235+
# Modify to have prefix ending with separator
236+
router._index.schema.index.prefix = "routes:"
237+
router._index.schema.index.key_separator = ":"
238+
239+
# Generate a route key
240+
route_key = router._route_ref_key(router._index, "route1", "ref1")
241+
242+
# Should not have double separator
243+
assert "::" not in route_key, f"Route key has double separator: {route_key}"
244+
assert route_key == "routes:route1:ref1", f"Unexpected route key: {route_key}"
245+
246+
247+
class TestSearchIndexKeyConstruction:
248+
"""Test SearchIndex's key construction with separators."""
249+
250+
def test_search_index_key_construction(self, redis_url):
251+
"""Test that SearchIndex properly handles key construction."""
252+
schema_dict = {
253+
"index": {
254+
"name": "search_test",
255+
"prefix": "doc:", # Ends with separator
256+
"key_separator": ":",
257+
"storage_type": "hash",
258+
},
259+
"fields": [
260+
{"name": "text", "type": "text"},
261+
{"name": "tag", "type": "tag"},
262+
],
263+
}
264+
265+
index = SearchIndex(
266+
IndexSchema.from_dict(schema_dict),
267+
redis_url=redis_url,
268+
)
269+
index.create(overwrite=True)
270+
271+
# Add a document
272+
data = [{"id": "123", "text": "test content", "tag": "test"}]
273+
keys = index.load(data, id_field="id")
274+
275+
# Check the generated key
276+
assert len(keys) == 1
277+
key = keys[0]
278+
279+
# Should not have double separator
280+
assert "::" not in key, f"Key has double separator: {key}"
281+
assert key == "doc:123", f"Expected 'doc:123' but got '{key}'"
282+
283+
# Clean up
284+
index.delete(drop=True)

0 commit comments

Comments
 (0)