fix: support multiple prefixes in SearchIndex.from_existing() (#258)

bsbodden · bsbodden · commit ec463ccf90f2 · 2025-09-29T22:14:51.000-07:00
Fixed bug in convert_index_info_to_schema() where only the first prefix
was captured from Redis indices with multiple prefixes.
Updated code to handle Union[str, List[str]] prefix type by normalizing
to first prefix when constructing Redis keys. This maintains backward
compatibility while supporting multiple prefixes in schema definition.

- Added normalization in prefix property (index.py)
- Normalized prefix in _create_key method (storage.py)
- Updated key() method to use normalized prefix property

Maintains backward compatibility by converting single-element prefix
lists to strings when loading from Redis. This ensures schema comparisons
work correctly when comparing existing indices with new configurations.

- Updated convert_index_info_to_schema to normalize single prefixes
- Updated unit tests to reflect normalization behavior
- Fixes schema comparison issues in semantic router and cache extensions
- Fixed vector field parsing to support both Redis 6.2.x and 7.x+ formats:

Redis 6.2.x format:
  [..., "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...]
  Position 6: algorithm value (FLAT/HNSW)
  Position 7: param count
  Position 8+: key-value pairs

Redis 7.x+ format:
  [..., "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...]
  Position 6+: all key-value pairs

The parser now detects the format by checking if position 6 is an algorithm
value (FLAT/HNSW) vs a key. For the old format, it stores the algorithm and
starts parsing key-value pairs from position 8.

Also added fallback logic to scan raw attrs if dims is not found through
normal parsing, and better handling of the "type" field which may be named
"data_type", "datatype", or just "type".

Validates that dims is present and raises clear error if missing.
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
@@ -245,8 +245,10 @@ def name(self) -> str:
     @property
     def prefix(self) -> str:
         """The optional key prefix that comes before a unique key value in
-        forming a Redis key."""
-        return self.schema.index.prefix
+        forming a Redis key. If multiple prefixes are configured, returns the
+        first one."""
+        prefix = self.schema.index.prefix
+        return prefix[0] if isinstance(prefix, list) else prefix
 
     @property
     def key_separator(self) -> str:
@@ -329,7 +331,7 @@ def key(self, id: str) -> str:
         """
         return self._storage._key(
             id=id,
-            prefix=self.schema.index.prefix,
+            prefix=self.prefix,
             key_separator=self.schema.index.key_separator,
         )
 
diff --git a/redisvl/index/storage.py b/redisvl/index/storage.py
@@ -114,9 +114,13 @@ def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> st
             except KeyError:
                 raise ValueError(f"Key field {id_field} not found in record {obj}")
 
+        # Normalize prefix: use first prefix if multiple are configured
+        prefix = self.index_schema.index.prefix
+        normalized_prefix = prefix[0] if isinstance(prefix, list) else prefix
+
         return self._key(
             key_value,
-            prefix=self.index_schema.index.prefix,
+            prefix=normalized_prefix,
             key_separator=self.index_schema.index.key_separator,
         )
 
diff --git a/redisvl/redis/connection.py b/redisvl/redis/connection.py
@@ -133,31 +133,66 @@ def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]:
         Dict[str, Any]: Schema dictionary.
     """
     index_name = index_info["index_name"]
-    prefixes = index_info["index_definition"][3][0]
+    prefixes = index_info["index_definition"][3]
+    # Normalize single-element prefix lists to string for backward compatibility
+    if isinstance(prefixes, list) and len(prefixes) == 1:
+        prefixes = prefixes[0]
     storage_type = index_info["index_definition"][1].lower()
 
     index_fields = index_info["attributes"]
 
     def parse_vector_attrs(attrs):
         # Parse vector attributes from Redis FT.INFO output
-        # Attributes start at position 6 as key-value pairs
+        # Format varies between Redis versions:
+        # - Redis 6.2.x: [... "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...]
+        #   Position 6: algorithm value (e.g., "FLAT" or "HNSW")
+        #   Position 7: param count
+        #   Position 8+: key-value pairs
+        # - Redis 7.x+: [... "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...]
+        #   Position 6+: all key-value pairs
+
         vector_attrs = {}
+        start_pos = 6
+
+        # Detect format: if position 6 looks like an algorithm value (not a key),
+        # we're dealing with the older format
+        if len(attrs) > 6:
+            pos6_str = str(attrs[6]).upper()
+            # Check if position 6 is an algorithm value (FLAT, HNSW) vs a key (ALGORITHM, TYPE, DIM)
+            if pos6_str in ("FLAT", "HNSW"):
+                # Old format (Redis 6.2.x): position 6 is algorithm value, position 7 is param count
+                # Store the algorithm
+                vector_attrs["algorithm"] = pos6_str
+                # Skip to position 8 where key-value pairs start
+                start_pos = 8
+
         try:
-            for i in range(6, len(attrs), 2):
+            for i in range(start_pos, len(attrs), 2):
                 if i + 1 < len(attrs):
                     key = str(attrs[i]).lower()
                     vector_attrs[key] = attrs[i + 1]
         except (IndexError, TypeError, ValueError):
+            # Silently continue - we'll validate required fields below
             pass
 
         # Normalize to expected field names
         normalized = {}
 
-        # Handle dims/dim field
+        # Handle dims/dim field - REQUIRED for vector fields
         if "dim" in vector_attrs:
             normalized["dims"] = int(vector_attrs.pop("dim"))
         elif "dims" in vector_attrs:
             normalized["dims"] = int(vector_attrs["dims"])
+        else:
+            # If dims is missing from normal parsing, try scanning the raw attrs
+            # This handles edge cases where the format is unexpected
+            for i in range(6, len(attrs) - 1):
+                if str(attrs[i]).upper() in ("DIM", "DIMS"):
+                    try:
+                        normalized["dims"] = int(attrs[i + 1])
+                        break
+                    except (ValueError, IndexError):
+                        pass
 
         # Handle distance_metric field
         if "distance_metric" in vector_attrs:
@@ -178,10 +213,20 @@ def parse_vector_attrs(attrs):
             normalized["datatype"] = vector_attrs["data_type"].lower()
         elif "datatype" in vector_attrs:
             normalized["datatype"] = vector_attrs["datatype"].lower()
+        elif "type" in vector_attrs:
+            # Sometimes it's just "type" instead of "data_type"
+            normalized["datatype"] = vector_attrs["type"].lower()
         else:
             # Default to float32 if missing
             normalized["datatype"] = "float32"
 
+        # Validate that we have required dims
+        if "dims" not in normalized:
+            raise ValueError(
+                f"Could not parse required 'dims' parameter from vector field attributes. "
+                f"Raw attrs: {attrs}, Parsed: {vector_attrs}"
+            )
+
         return normalized
 
     def parse_attrs(attrs, field_type=None):
diff --git a/redisvl/schema/schema.py b/redisvl/schema/schema.py
@@ -58,8 +58,8 @@ class IndexInfo(BaseModel):
 
     name: str
     """The unique name of the index."""
-    prefix: str = "rvl"
-    """The prefix used for Redis keys associated with this index."""
+    prefix: Union[str, List[str]] = "rvl"
+    """The prefix(es) used for Redis keys associated with this index. Can be a single string or a list of strings."""
     key_separator: str = ":"
     """The separator character used in designing Redis keys."""
     storage_type: StorageType = StorageType.HASH
diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py
@@ -153,6 +153,73 @@ def test_search_index_from_existing_complex(client):
     assert index.schema == index2.schema
 
 
+def test_search_index_from_existing_multiple_prefixes(client):
+    """Test that from_existing correctly handles indices with multiple prefixes (issue #258)."""
+    from redis.commands.search.field import TextField, VectorField
+
+    index_name = "test_multi_prefix"
+
+    # Create index manually using redis-py with multiple prefixes
+    # This simulates an index created with: FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: ...
+    try:
+        # Clean up any existing index
+        try:
+            client.ft(index_name).dropindex(delete_documents=True)
+        except Exception:
+            pass
+
+        # Create index using raw FT.CREATE command with multiple prefixes
+        # FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: SCHEMA user TAG text TEXT ...
+        client.execute_command(
+            "FT.CREATE",
+            index_name,
+            "ON",
+            "HASH",
+            "PREFIX",
+            "3",
+            "prefix_a:",
+            "prefix_b:",
+            "prefix_c:",
+            "SCHEMA",
+            "user",
+            "TAG",
+            "text",
+            "TEXT",
+            "embedding",
+            "VECTOR",
+            "FLAT",
+            "6",
+            "TYPE",
+            "FLOAT32",
+            "DIM",
+            "3",
+            "DISTANCE_METRIC",
+            "COSINE",
+        )
+
+        # Now test from_existing - this is where the bug was
+        loaded_index = SearchIndex.from_existing(index_name, redis_client=client)
+
+        # Verify all prefixes are preserved (this was failing before fix)
+        # Before the fix, only "prefix_a:" would be returned
+        assert loaded_index.schema.index.prefix == [
+            "prefix_a:",
+            "prefix_b:",
+            "prefix_c:",
+        ]
+
+        # Verify the index name and storage type
+        assert loaded_index.schema.index.name == index_name
+        assert loaded_index.schema.index.storage_type.value == "hash"
+
+    finally:
+        # Cleanup
+        try:
+            client.ft(index_name).dropindex(delete_documents=True)
+        except Exception:
+            pass
+
+
 def test_search_index_no_prefix(index_schema):
     # specify an explicitly empty prefix...
     index_schema.index.prefix = ""
diff --git a/tests/unit/test_convert_index_info.py b/tests/unit/test_convert_index_info.py
@@ -0,0 +1,112 @@
+"""Unit tests for convert_index_info_to_schema function."""
+
+import pytest
+
+from redisvl.redis.connection import convert_index_info_to_schema
+
+
+def test_convert_index_info_single_prefix():
+    """Test converting index info with a single prefix.
+
+    Single-element prefix lists are normalized to strings for backward compatibility.
+    """
+    index_info = {
+        "index_name": "test_index",
+        "index_definition": [
+            "key_type",
+            "HASH",
+            "prefixes",
+            ["prefix_a"],
+        ],
+        "attributes": [],
+    }
+
+    result = convert_index_info_to_schema(index_info)
+
+    assert result["index"]["name"] == "test_index"
+    assert result["index"]["prefix"] == "prefix_a"  # Normalized to string
+    assert result["index"]["storage_type"] == "hash"
+
+
+def test_convert_index_info_multiple_prefixes():
+    """Test converting index info with multiple prefixes (issue #258)."""
+    index_info = {
+        "index_name": "test_index",
+        "index_definition": [
+            "key_type",
+            "HASH",
+            "prefixes",
+            ["prefix_a", "prefix_b", "prefix_c"],
+        ],
+        "attributes": [],
+    }
+
+    result = convert_index_info_to_schema(index_info)
+
+    assert result["index"]["name"] == "test_index"
+    assert result["index"]["prefix"] == ["prefix_a", "prefix_b", "prefix_c"]
+    assert result["index"]["storage_type"] == "hash"
+
+
+def test_convert_index_info_json_storage():
+    """Test converting index info with JSON storage type.
+
+    Single-element prefix lists are normalized to strings for backward compatibility.
+    """
+    index_info = {
+        "index_name": "test_json_index",
+        "index_definition": [
+            "key_type",
+            "JSON",
+            "prefixes",
+            ["json_prefix"],
+        ],
+        "attributes": [],
+    }
+
+    result = convert_index_info_to_schema(index_info)
+
+    assert result["index"]["name"] == "test_json_index"
+    assert result["index"]["prefix"] == "json_prefix"  # Normalized to string
+    assert result["index"]["storage_type"] == "json"
+
+
+def test_convert_index_info_with_fields():
+    """Test converting index info with field definitions."""
+    index_info = {
+        "index_name": "test_index",
+        "index_definition": [
+            "key_type",
+            "HASH",
+            "prefixes",
+            ["prefix_a", "prefix_b"],
+        ],
+        "attributes": [
+            [
+                "identifier",
+                "user",
+                "attribute",
+                "user",
+                "type",
+                "TAG",
+            ],
+            [
+                "identifier",
+                "text",
+                "attribute",
+                "text",
+                "type",
+                "TEXT",
+            ],
+        ],
+    }
+
+    result = convert_index_info_to_schema(index_info)
+
+    assert result["index"]["name"] == "test_index"
+    assert result["index"]["prefix"] == ["prefix_a", "prefix_b"]
+    assert len(result["fields"]) == 2
+    assert result["fields"][0]["name"] == "user"
+    assert result["fields"][0]["type"] == "tag"
+    assert result["fields"][1]["name"] == "text"
+    assert result["fields"][1]["type"] == "text"