Adding Vector Index Shard Key for DiskANN and quantizedFlat (Azure#39512)

andrewmathew1 · Andrew Mathew · web-flow · commit 03d626a3c3a7 · 2025-02-13T12:13:25.000-05:00
* added vectorIndexShardKey to quantizedFlat and diskANN

* added tests for vectorIndexShardKey

* added testing for invalid type for vectorIndexShardKey and range of QuantizationByteSize

* removed **provisional** from vector embedding policy

* added specific index types in readme for vectorIndexShardKey

* updated error message for quantizationByteSize out of range

---------

Co-authored-by: Andrew Mathew &lt;andrewmathew@microsoft.com&gt;
diff --git a/sdk/cosmos/azure-cosmos/README.md b/sdk/cosmos/azure-cosmos/README.md
@@ -714,13 +714,16 @@ For vector index types of diskANN and quantizedFlat, there are additional option
 quantizationByteSize - the number of bytes used in product quantization of the vectors. A larger value may result in better recall for vector searches at the expense of latency. This applies to index types diskANN and quantizedFlat. The allowed range is between 1 and the minimum between 512 and the vector dimensions. The default value is 64.
 
 indexingSearchListSize - which represents the size of the candidate list of approximate neighbors stored while building the diskANN index as part of the optimization processes. This applies only to index type diskANN. The allowed range is between 25 and 500.
+
+vectorIndexShardKey - a list of strings containing the shard keys used for partitioning vector indexes. The maximum allowed size for this array is 1, meaning that there is only one allowed path. This applies to index types diskANN and quantizedFlat.
 ```python
 indexing_policy = {
         "automatic": True,
         "indexingMode": "consistent",
         "vectorIndexes": [
             {"path": "/vector1", "type": "quantizedFlat", "quantizationByteSize": 8},
-            {"path": "/vector2", "type": "diskANN", "indexingSearchListSize": 50}
+            {"path": "/vector2", "type": "diskANN", "indexingSearchListSize": 50},
+            {"path": "/vector3", "type": "diskANN", "vectorIndexShardKey": ["/country/city"]}
         ]
     }
 ```
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py
@@ -333,7 +333,7 @@ async def create_container_if_not_exists(
         :keyword int analytical_storage_ttl: Analytical store time to live (TTL) for items in the container.  A value of
             None leaves analytical storage off and a value of -1 turns analytical storage on with no TTL. Please
             note that analytical storage can only be enabled on Synapse Link enabled accounts.
-        :keyword Dict[str, Any] vector_embedding_policy: **provisional** The vector embedding policy for the container.
+        :keyword Dict[str, Any] vector_embedding_policy: The vector embedding policy for the container.
             Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying
             data type, and is generated for a particular distance function.
         :keyword Dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to
diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/database.py b/sdk/cosmos/azure-cosmos/azure/cosmos/database.py
@@ -204,7 +204,7 @@ def create_container(  # pylint:disable=docstring-missing-param
         :keyword List[Dict[str, str]] computed_properties: Sets The computed properties for this
             container in the Azure Cosmos DB Service. For more Information on how to use computed properties visit
             `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet`
-        :keyword Dict[str, Any] vector_embedding_policy: **provisional** The vector embedding policy for the container.
+        :keyword Dict[str, Any] vector_embedding_policy: The vector embedding policy for the container.
             Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying
             data type, and is generated for a particular distance function.
         :keyword Dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to
diff --git a/sdk/cosmos/azure-cosmos/samples/index_management.py b/sdk/cosmos/azure-cosmos/samples/index_management.py
@@ -689,7 +689,7 @@ def use_vector_embedding_policy(db):
         indexing_policy = {
             "vectorIndexes": [
                 {"path": "/vector", "type": "quantizedFlat", "quantizationByteSize": 8},
-                {"path": "/vector2", "type": "diskANN", "indexingSearchListSize": 50}
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["/city"], "indexingSearchListSize": 50}
             ]
         }
         vector_embedding_policy = {
diff --git a/sdk/cosmos/azure-cosmos/samples/index_management_async.py b/sdk/cosmos/azure-cosmos/samples/index_management_async.py
@@ -684,7 +684,7 @@ async def use_vector_embedding_policy(db):
         indexing_policy = {
             "vectorIndexes": [
                 {"path": "/vector", "type": "quantizedFlat", "quantizationByteSize": 8},
-                {"path": "/vector2", "type": "diskANN", "indexingSearchListSize": 50}
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["/city"], "indexingSearchListSize": 50}
             ]
         }
         vector_embedding_policy = {
diff --git a/sdk/cosmos/azure-cosmos/test/test_vector_policy.py b/sdk/cosmos/azure-cosmos/test/test_vector_policy.py
@@ -40,7 +40,7 @@ def test_create_vector_embedding_container(self):
             "vectorIndexes": [
                 {"path": "/vector1", "type": "flat"},
                 {"path": "/vector2", "type": "quantizedFlat", "quantizationByteSize": 8},
-                {"path": "/vector3", "type": "diskANN", "quantizationByteSize": 8, "indexingSearchListSize": 50}
+                {"path": "/vector3", "type": "diskANN", "quantizationByteSize": 8, "vectorIndexShardKey": ["/city"], "indexingSearchListSize": 50}
             ]
         }
         vector_embedding_policy = {
@@ -77,6 +77,23 @@ def test_create_vector_embedding_container(self):
         assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"]
         self.test_db.delete_container(container_id)
 
+        # Pass a vector indexing policy with hierarchical vectorIndexShardKey value
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector2", "type": "diskANN", 'quantizationByteSize': 64, 'indexingSearchListSize': 100, "vectorIndexShardKey": ["/country/city"]}]
+        }
+        container_id = "vector_container" + str(uuid.uuid4())
+        created_container = self.test_db.create_container(
+            id=container_id,
+            partition_key=PartitionKey(path="/id"),
+            indexing_policy=indexing_policy,
+            vector_embedding_policy=vector_embedding_policy
+        )
+        properties = created_container.read()
+        assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy
+        assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"]
+        self.test_db.delete_container(container_id)
+
     def test_fail_create_vector_indexing_policy(self):
         vector_embedding_policy = {
             "vectorEmbeddings": [
@@ -85,7 +102,15 @@ def test_fail_create_vector_indexing_policy(self):
                     "dataType": "float32",
                     "dimensions": 256,
                     "distanceFunction": "euclidean"
-                }]}
+                },
+                {
+                    "path": "/vector2",
+                    "dataType": "int8",
+                    "dimensions": 200,
+                    "distanceFunction": "dotproduct"
+                }
+            ]
+        }
 
         # Pass a vector indexing policy without embedding policy
         indexing_policy = {
@@ -123,7 +148,7 @@ def test_fail_create_vector_indexing_policy(self):
         # Pass a vector indexing policy with non-matching path
         indexing_policy = {
             "vectorIndexes": [
-                {"path": "/vector2", "type": "flat"}]
+                {"path": "/vector3", "type": "flat"}]
         }
         try:
             self.test_db.create_container(
@@ -135,7 +160,7 @@ def test_fail_create_vector_indexing_policy(self):
             pytest.fail("Container creation should have failed for index mismatch.")
         except exceptions.CosmosHttpResponseError as e:
             assert e.status_code == 400
-            assert "vector2 not matching in Embedding's path" in e.http_error_message
+            assert "vector3 not matching in Embedding's path" in e.http_error_message
 
         # Pass a vector indexing policy with wrong quantizationByteSize value
         indexing_policy = {
@@ -152,7 +177,7 @@ def test_fail_create_vector_indexing_policy(self):
             pytest.fail("Container creation should have failed for value mismatch.")
         except exceptions.CosmosHttpResponseError as e:
             assert e.status_code == 400
-            assert "QuantizationByteSize value :: 0 is out of range. The allowed range is between 1 and 256."\
+            assert "The Vector Indexing Policy parameter QuantizationByteSize value :: 0 is out of range. The allowed range is between 1 and 256."\
                    in e.http_error_message
 
         # Pass a vector indexing policy with wrong indexingSearchListSize value
@@ -173,6 +198,85 @@ def test_fail_create_vector_indexing_policy(self):
             assert "IndexingSearchListSize value :: 5 is out of range. The allowed range is between 25 and 500."\
                    in e.http_error_message
 
+        # Pass a vector indexing policy with wrong vectorIndexShardKey value
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["country"]}]
+        }
+        try:
+            self.test_db.create_container(
+                id='vector_container',
+                partition_key=PartitionKey(path="/id"),
+                indexing_policy=indexing_policy,
+                vector_embedding_policy=vector_embedding_policy
+            )
+            pytest.fail("Container creation should have failed for value mismatch.")
+        except exceptions.CosmosHttpResponseError as e:
+            assert e.status_code == 400
+            assert "The Vector Indexing Policy has an invalid Shard Path: country." in e.http_error_message
+
+        # Pass a vector indexing policy with too many shard paths
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["/country", "/city", "/zipcode"]}]
+        }
+        try:
+            self.test_db.create_container(
+                id='vector_container',
+                partition_key=PartitionKey(path="/id"),
+                indexing_policy=indexing_policy,
+                vector_embedding_policy=vector_embedding_policy
+            )
+            pytest.fail("Container creation should have failed for value mismatch.")
+        except exceptions.CosmosHttpResponseError as e:
+            assert e.status_code == 400
+            assert "The number of shard paths defined in the Vector Indexing Policy: 3 exceeds the maximum: 1." \
+                   in e.http_error_message
+
+        # Pass a vector indexing policy with an invalid type for vectorIndexShardKey
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": "/country"}]
+        }
+        try:
+            self.test_db.create_container(
+                id='vector_container',
+                partition_key=PartitionKey(path="/id"),
+                indexing_policy=indexing_policy,
+                vector_embedding_policy=vector_embedding_policy
+            )
+            pytest.fail("Container creation should have failed for value mismatch.")
+        except exceptions.CosmosHttpResponseError as e:
+            assert e.status_code == 400
+            assert "One of the specified inputs is invalid" \
+                   in e.http_error_message
+
+        # Pass a vector indexing policy with dimensions above 512 to test the max range of  "quantizationByteSize"
+        vector_embedding_policy = {
+            "vectorEmbeddings": [
+                {
+                    "path": "/vector1",
+                    "dataType": "float32",
+                    "dimensions": 550,
+                    "distanceFunction": "euclidean"
+                }]}
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector1", "type": "quantizedFlat", "quantizationByteSize": 513 }]
+        }
+        try:
+            self.test_db.create_container(
+                id='vector_container',
+                partition_key=PartitionKey(path="/id"),
+                indexing_policy=indexing_policy,
+                vector_embedding_policy=vector_embedding_policy
+            )
+            pytest.fail("Container creation should have failed for value mismatch.")
+        except exceptions.CosmosHttpResponseError as e:
+            assert e.status_code == 400
+            assert "The Vector Indexing Policy parameter QuantizationByteSize value :: 513 is out of range." \
+                   in e.http_error_message
+
     def test_fail_replace_vector_indexing_policy(self):
         vector_embedding_policy = {
             "vectorEmbeddings": [
diff --git a/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py b/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py
@@ -50,7 +50,9 @@ async def test_create_vector_embedding_container_async(self):
         indexing_policy = {
             "vectorIndexes": [
                 {"path": "/vector1", "type": "flat"},
-                {"path": "/vector2", "type": "quantizedFlat", "quantizationByteSize": 64},
+
+                {"path": "/vector2", "type": "quantizedFlat", "quantizationByteSize": 64, "vectorIndexShardKey": ["/city"]},
+
                 {"path": "/vector3", "type": "diskANN", "quantizationByteSize": 8, "indexingSearchListSize": 50}
             ]
         }
@@ -96,7 +98,15 @@ async def test_fail_create_vector_indexing_policy_async(self):
                     "dataType": "float32",
                     "dimensions": 256,
                     "distanceFunction": "euclidean"
-                }]}
+                },
+                {
+                    "path": "/vector2",
+                    "dataType": "int8",
+                    "dimensions": 200,
+                    "distanceFunction": "dotproduct"
+                }
+            ]
+        }
 
         # Pass a vector indexing policy without embedding policy
         indexing_policy = {
@@ -134,7 +144,7 @@ async def test_fail_create_vector_indexing_policy_async(self):
         # Pass a vector indexing policy with non-matching path
         indexing_policy = {
             "vectorIndexes": [
-                {"path": "/vector2", "type": "flat"}]
+                {"path": "/vector3", "type": "flat"}]
         }
         try:
             await self.test_db.create_container(
@@ -146,7 +156,7 @@ async def test_fail_create_vector_indexing_policy_async(self):
             pytest.fail("Container creation should have failed for index mismatch.")
         except exceptions.CosmosHttpResponseError as e:
             assert e.status_code == 400
-            assert "vector2 not matching in Embedding's path" in e.http_error_message
+            assert "vector3 not matching in Embedding's path" in e.http_error_message
 
         # Pass a vector indexing policy with wrong quantizationByteSize value
         indexing_policy = {
@@ -163,7 +173,7 @@ async def test_fail_create_vector_indexing_policy_async(self):
             pytest.fail("Container creation should have failed for value mismatch.")
         except exceptions.CosmosHttpResponseError as e:
             assert e.status_code == 400
-            assert "QuantizationByteSize value :: 0 is out of range. The allowed range is between 1 and 256." \
+            assert "The Vector Indexing Policy parameter QuantizationByteSize value :: 0 is out of range. The allowed range is between 1 and 256." \
                    in e.http_error_message
 
         # Pass a vector indexing policy with wrong indexingSearchListSize value
@@ -184,6 +194,41 @@ async def test_fail_create_vector_indexing_policy_async(self):
             assert "IndexingSearchListSize value :: 5 is out of range. The allowed range is between 25 and 500." \
                    in e.http_error_message
 
+        # Pass a vector indexing policy with wrong vectorIndexShardKey value
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["country"]}]
+        }
+        try:
+            await self.test_db.create_container(
+                id='vector_container',
+                partition_key=PartitionKey(path="/id"),
+                indexing_policy=indexing_policy,
+                vector_embedding_policy=vector_embedding_policy
+            )
+            pytest.fail("Container creation should have failed for value mismatch.")
+        except exceptions.CosmosHttpResponseError as e:
+            assert e.status_code == 400
+            assert "The Vector Indexing Policy has an invalid Shard Path: country." in e.http_error_message
+
+        # Pass a vector indexing policy with too many shard paths
+        indexing_policy = {
+            "vectorIndexes": [
+                {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["/country", "/city", "/zipcode"]}]
+        }
+        try:
+            await self.test_db.create_container(
+                id='vector_container',
+                partition_key=PartitionKey(path="/id"),
+                indexing_policy=indexing_policy,
+                vector_embedding_policy=vector_embedding_policy
+            )
+            pytest.fail("Container creation should have failed for value mismatch.")
+        except exceptions.CosmosHttpResponseError as e:
+            assert e.status_code == 400
+            assert "The number of shard paths defined in the Vector Indexing Policy: 3 exceeds the maximum: 1." \
+                   in e.http_error_message
+
     async def test_fail_replace_vector_indexing_policy_async(self):
         vector_embedding_policy = {
             "vectorEmbeddings": [

Original file line number	Diff line number	Diff line change
`@@ -689,7 +689,7 @@ def use_vector_embedding_policy(db):`
`689`	`689`	`indexing_policy = {`
`690`	`690`	`"vectorIndexes": [`
`691`	`691`	`{"path": "/vector", "type": "quantizedFlat", "quantizationByteSize": 8},`
`692`		`- {"path": "/vector2", "type": "diskANN", "indexingSearchListSize": 50}`
	`692`	`+ {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["/city"], "indexingSearchListSize": 50}`
`693`	`693`	`]`
`694`	`694`	`}`
`695`	`695`	`vector_embedding_policy = {`
Original file line number	Diff line number	Diff line change
`@@ -684,7 +684,7 @@ async def use_vector_embedding_policy(db):`
`684`	`684`	`indexing_policy = {`
`685`	`685`	`"vectorIndexes": [`
`686`	`686`	`{"path": "/vector", "type": "quantizedFlat", "quantizationByteSize": 8},`
`687`		`- {"path": "/vector2", "type": "diskANN", "indexingSearchListSize": 50}`
	`687`	`+ {"path": "/vector2", "type": "diskANN", "vectorIndexShardKey": ["/city"], "indexingSearchListSize": 50}`
`688`	`688`	`]`
`689`	`689`	`}`
`690`	`690`	`vector_embedding_policy = {`