Skip to content

Commit 4ca8bbb

Browse files
authored
[Cosmos] fix: add ability to replace indexing policies with vector indexes (#42810)
* changes * more tests, changelog * nits * Update CHANGELOG.md
1 parent ff3e1ff commit 4ca8bbb

File tree

5 files changed

+252
-5
lines changed

5 files changed

+252
-5
lines changed

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#### Features Added
66
* Added read_items API to provide an efficient method for retrieving multiple items in a single request. See [PR 42167](https://github.com/Azure/azure-sdk-for-python/pull/42167).
7+
* Added ability to replace a container's indexing policy if a vector embedding policy was present. See [PR 42810](https://github.com/Azure/azure-sdk-for-python/pull/42810).
78

89
#### Breaking Changes
910

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,7 @@ async def replace_container(
519519
analytical_storage_ttl: Optional[int] = None,
520520
computed_properties: Optional[List[Dict[str, str]]] = None,
521521
full_text_policy: Optional[Dict[str, Any]] = None,
522+
vector_embedding_policy: Optional[Dict[str, Any]] = None,
522523
**kwargs: Any
523524
) -> ContainerProxy:
524525
"""Reset the properties of the container.
@@ -596,7 +597,8 @@ async def replace_container(
596597
"conflictResolutionPolicy": conflict_resolution_policy,
597598
"analyticalStorageTtl": analytical_storage_ttl,
598599
"computedProperties": computed_properties,
599-
"fullTextPolicy": full_text_policy
600+
"fullTextPolicy": full_text_policy,
601+
"vectorEmbeddingPolicy": vector_embedding_policy
600602
}.items()
601603
if value is not None
602604
}

sdk/cosmos/azure-cosmos/azure/cosmos/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ def replace_container( # pylint:disable=docstring-missing-param
584584
analytical_storage_ttl: Optional[int] = None,
585585
computed_properties: Optional[List[Dict[str, str]]] = None,
586586
full_text_policy: Optional[Dict[str, Any]] = None,
587+
vector_embedding_policy: Optional[Dict[str, Any]] = None,
587588
**kwargs: Any
588589
) -> ContainerProxy:
589590
"""Reset the properties of the container.
@@ -665,6 +666,7 @@ def replace_container( # pylint:disable=docstring-missing-param
665666
"analyticalStorageTtl": analytical_storage_ttl,
666667
"computedProperties": computed_properties,
667668
"fullTextPolicy": full_text_policy,
669+
"vectorEmbeddingPolicy": vector_embedding_policy
668670
}.items()
669671
if value is not None
670672
}

sdk/cosmos/azure-cosmos/tests/test_vector_policy.py

Lines changed: 123 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,84 @@ def test_create_vector_embedding_container(self):
9494
assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"]
9595
self.test_db.delete_container(container_id)
9696

97+
def test_replace_vector_indexing_policy(self):
98+
# Replace should work so long as the new indexing policy doesn't change the vector indexes, and as long as
99+
# the previously defined vector embedding policy is also provided.
100+
vector_embedding_policy = {
101+
"vectorEmbeddings": [
102+
{
103+
"path": "/vector1",
104+
"dataType": "float32",
105+
"dimensions": 256,
106+
"distanceFunction": "euclidean"
107+
}
108+
]
109+
}
110+
indexing_policy = {
111+
"indexingMode": "consistent",
112+
"automatic": True,
113+
"includedPaths": [
114+
{
115+
"path": "/*"
116+
}
117+
],
118+
"excludedPaths": [
119+
{
120+
"path": "/vector1/*"
121+
},
122+
{
123+
"path": "/\"_etag\"/?"
124+
}
125+
],
126+
"fullTextIndexes": [],
127+
"vectorIndexes": [
128+
{
129+
"path": "/vector1",
130+
"type": "diskANN",
131+
"quantizationByteSize": 128,
132+
"indexingSearchListSize": 100
133+
}
134+
]
135+
}
136+
container_id = "vector_container" + str(uuid.uuid4())
137+
created_container = self.test_db.create_container(
138+
id=container_id,
139+
partition_key=PartitionKey(path="/id"),
140+
indexing_policy=indexing_policy,
141+
vector_embedding_policy=vector_embedding_policy
142+
)
143+
new_indexing_policy = {
144+
"indexingMode": "consistent",
145+
"automatic": True,
146+
"includedPaths": [
147+
{"path": "/color/?"},
148+
{"path": "/description/?"},
149+
{"path": "/cost/?"}
150+
],
151+
"excludedPaths": [
152+
{"path": "/*"},
153+
{"path": "/vector1/*"},
154+
{"path": "/\"_etag\"/?"}
155+
],
156+
"fullTextIndexes": [],
157+
"vectorIndexes": [
158+
{
159+
"path": "/vector1",
160+
"type": "diskANN",
161+
"quantizationByteSize": 128,
162+
"indexingSearchListSize": 100
163+
}]
164+
}
165+
self.test_db.replace_container(
166+
created_container,
167+
PartitionKey(path="/id"),
168+
vector_embedding_policy=vector_embedding_policy,
169+
indexing_policy=new_indexing_policy)
170+
properties = created_container.read()
171+
assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy
172+
assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"]
173+
self.test_db.delete_container(container_id)
174+
97175
def test_fail_create_vector_indexing_policy(self):
98176
vector_embedding_policy = {
99177
"vectorEmbeddings": [
@@ -297,6 +375,29 @@ def test_fail_replace_vector_indexing_policy(self):
297375
indexing_policy=indexing_policy,
298376
vector_embedding_policy=vector_embedding_policy
299377
)
378+
# don't provide vector embedding policy
379+
try:
380+
self.test_db.replace_container(
381+
created_container,
382+
PartitionKey(path="/id"),
383+
indexing_policy=indexing_policy)
384+
pytest.fail("Container replace should have failed for missing embedding policy.")
385+
except exceptions.CosmosHttpResponseError as e:
386+
assert e.status_code == 400
387+
assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path."
388+
in e.http_error_message)
389+
# don't provide vector indexing policy
390+
try:
391+
self.test_db.replace_container(
392+
created_container,
393+
PartitionKey(path="/id"),
394+
vector_embedding_policy=vector_embedding_policy)
395+
pytest.fail("Container replace should have failed for missing indexing policy.")
396+
except exceptions.CosmosHttpResponseError as e:
397+
assert e.status_code == 400
398+
assert ("The Vector Indexing Policy cannot be changed in Collection Replace."
399+
in e.http_error_message)
400+
# using a new indexing policy
300401
new_indexing_policy = {
301402
"vectorIndexes": [
302403
{"path": "/vector1", "type": "quantizedFlat"}]
@@ -305,11 +406,31 @@ def test_fail_replace_vector_indexing_policy(self):
305406
self.test_db.replace_container(
306407
created_container,
307408
PartitionKey(path="/id"),
409+
vector_embedding_policy=vector_embedding_policy,
308410
indexing_policy=new_indexing_policy)
309-
pytest.fail("Container replace should have failed for indexing policy.")
411+
pytest.fail("Container replace should have failed for new indexing policy.")
310412
except exceptions.CosmosHttpResponseError as e:
311413
assert e.status_code == 400
312-
assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path."
414+
assert ("Paths in existing vector indexing policy cannot be modified in Collection Replace"
415+
in e.http_error_message)
416+
# using a new vector embedding policy
417+
new_embedding_policy = {
418+
"vectorEmbeddings": [
419+
{
420+
"path": "/vector1",
421+
"dataType": "float32",
422+
"dimensions": 384,
423+
"distanceFunction": "euclidean"}]}
424+
try:
425+
self.test_db.replace_container(
426+
created_container,
427+
PartitionKey(path="/id"),
428+
vector_embedding_policy=new_embedding_policy,
429+
indexing_policy=indexing_policy)
430+
pytest.fail("Container replace should have failed for new embedding policy.")
431+
except exceptions.CosmosHttpResponseError as e:
432+
assert e.status_code == 400
433+
assert ("The Vector Embedding Policy cannot be changed in Collection Replace"
313434
in e.http_error_message)
314435
self.test_db.delete_container(container_id)
315436

sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py

Lines changed: 123 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,84 @@ async def test_create_vector_embedding_container_async(self):
9090
assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"]
9191
await self.test_db.delete_container(container_id)
9292

93+
async def test_replace_vector_indexing_policy_async(self):
94+
# Replace should work so long as the new indexing policy doesn't change the vector indexes, and as long as
95+
# the previously defined vector embedding policy is also provided.
96+
vector_embedding_policy = {
97+
"vectorEmbeddings": [
98+
{
99+
"path": "/vector1",
100+
"dataType": "float32",
101+
"dimensions": 256,
102+
"distanceFunction": "euclidean"
103+
}
104+
]
105+
}
106+
indexing_policy = {
107+
"indexingMode": "consistent",
108+
"automatic": True,
109+
"includedPaths": [
110+
{
111+
"path": "/*"
112+
}
113+
],
114+
"excludedPaths": [
115+
{
116+
"path": "/vector1/*"
117+
},
118+
{
119+
"path": "/\"_etag\"/?"
120+
}
121+
],
122+
"fullTextIndexes": [],
123+
"vectorIndexes": [
124+
{
125+
"path": "/vector1",
126+
"type": "diskANN",
127+
"quantizationByteSize": 128,
128+
"indexingSearchListSize": 100
129+
}
130+
]
131+
}
132+
container_id = "vector_container" + str(uuid.uuid4())
133+
created_container = await self.test_db.create_container(
134+
id=container_id,
135+
partition_key=PartitionKey(path="/id"),
136+
indexing_policy=indexing_policy,
137+
vector_embedding_policy=vector_embedding_policy
138+
)
139+
new_indexing_policy = {
140+
"indexingMode": "consistent",
141+
"automatic": True,
142+
"includedPaths": [
143+
{"path": "/color/?"},
144+
{"path": "/description/?"},
145+
{"path": "/cost/?"}
146+
],
147+
"excludedPaths": [
148+
{"path": "/*"},
149+
{"path": "/vector1/*"},
150+
{"path": "/\"_etag\"/?"}
151+
],
152+
"fullTextIndexes": [],
153+
"vectorIndexes": [
154+
{
155+
"path": "/vector1",
156+
"type": "diskANN",
157+
"quantizationByteSize": 128,
158+
"indexingSearchListSize": 100
159+
}]
160+
}
161+
await self.test_db.replace_container(
162+
created_container,
163+
PartitionKey(path="/id"),
164+
vector_embedding_policy=vector_embedding_policy,
165+
indexing_policy=new_indexing_policy)
166+
properties = await created_container.read()
167+
assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy
168+
assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"]
169+
await self.test_db.delete_container(container_id)
170+
93171
async def test_fail_create_vector_indexing_policy_async(self):
94172
vector_embedding_policy = {
95173
"vectorEmbeddings": [
@@ -249,6 +327,29 @@ async def test_fail_replace_vector_indexing_policy_async(self):
249327
indexing_policy=indexing_policy,
250328
vector_embedding_policy=vector_embedding_policy
251329
)
330+
# don't provide vector embedding policy
331+
try:
332+
await self.test_db.replace_container(
333+
created_container,
334+
PartitionKey(path="/id"),
335+
indexing_policy=indexing_policy)
336+
pytest.fail("Container replace should have failed for missing embedding policy.")
337+
except exceptions.CosmosHttpResponseError as e:
338+
assert e.status_code == 400
339+
assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path."
340+
in e.http_error_message)
341+
# don't provide vector indexing policy
342+
try:
343+
await self.test_db.replace_container(
344+
created_container,
345+
PartitionKey(path="/id"),
346+
vector_embedding_policy=vector_embedding_policy)
347+
pytest.fail("Container replace should have failed for missing indexing policy.")
348+
except exceptions.CosmosHttpResponseError as e:
349+
assert e.status_code == 400
350+
assert ("The Vector Indexing Policy cannot be changed in Collection Replace."
351+
in e.http_error_message)
352+
# using a new indexing policy
252353
new_indexing_policy = {
253354
"vectorIndexes": [
254355
{"path": "/vector1", "type": "quantizedFlat"}]
@@ -257,11 +358,31 @@ async def test_fail_replace_vector_indexing_policy_async(self):
257358
await self.test_db.replace_container(
258359
created_container,
259360
PartitionKey(path="/id"),
361+
vector_embedding_policy=vector_embedding_policy,
260362
indexing_policy=new_indexing_policy)
261-
pytest.fail("Container replace should have failed for indexing policy.")
363+
pytest.fail("Container replace should have failed for new indexing policy.")
262364
except exceptions.CosmosHttpResponseError as e:
263365
assert e.status_code == 400
264-
assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path."
366+
assert ("Paths in existing vector indexing policy cannot be modified in Collection Replace"
367+
in e.http_error_message)
368+
# using a new vector embedding policy
369+
new_embedding_policy = {
370+
"vectorEmbeddings": [
371+
{
372+
"path": "/vector1",
373+
"dataType": "float32",
374+
"dimensions": 384,
375+
"distanceFunction": "euclidean"}]}
376+
try:
377+
await self.test_db.replace_container(
378+
created_container,
379+
PartitionKey(path="/id"),
380+
vector_embedding_policy=new_embedding_policy,
381+
indexing_policy=indexing_policy)
382+
pytest.fail("Container replace should have failed for new embedding policy.")
383+
except exceptions.CosmosHttpResponseError as e:
384+
assert e.status_code == 400
385+
assert ("The Vector Embedding Policy cannot be changed in Collection Replace"
265386
in e.http_error_message)
266387
await self.test_db.delete_container(container_id)
267388

0 commit comments

Comments
 (0)