Skip to content

Commit 35f9733

Browse files
Consolidate Container Properties cache in Client Instance (#35293)
* Add client cache to hold container properties This adds a client level cache for container properties. * Added tests for container properties cache * Update _cosmos_client_connection_async.py * Update _cosmos_client_connection_async.py * Fix typos * Update container.py * Update _container.py * Update cache test * Update test_container_properties_cache.py * update tests * Update test_container_properties_cache.py * Update tests updated tests that were compensating for extra container reads, the extra reads no longer happen with adding the container cache properties in the client. * tests updates * Mypy fixes * mypy fix * Update * Cache update Fixed some reccomnded changes, and added optimizations for the cache. Only including needed properties info that will be unlikely to change. * test update * Update CHANGELOG.md * added set_properties_cache method Added a method in base.py to set the properties cache so we only have to change one location in case in the future we want to cache additional properties * Update _base.py * Update _cosmos_client_connection.py * MyPy fixes * Fix mypy issue * Update _cosmos_client_connection.py * Update _cosmos_client_connection.py * Update _cosmos_client_connection_async.py * Update _cosmos_client_connection.py * Update container cache Changed container cache to be a private attribute. Also added tests to verify the contents of the cache to be correct ones. * Update sdk/cosmos/azure-cosmos/CHANGELOG.md Co-authored-by: Kushagra Thapar <[email protected]> * Additional pylint updates * more pylint fixes * pylint fix * Update test_container_properties_cache.py --------- Co-authored-by: Kushagra Thapar <[email protected]>
1 parent d05603b commit 35f9733

13 files changed

+267
-53
lines changed

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#### Bugs Fixed
1111
* Fixed a bug where change feed query in Async client was not returning all pages due to case-sensitive response headers. See [PR 35090](https://github.com/Azure/azure-sdk-for-python/pull/35090)
1212
* Fixed a bug when a retryable exception occurs in the first page of a query execution causing query to return 0 results. See [PR 35090](https://github.com/Azure/azure-sdk-for-python/pull/35090).
13+
* Consolidated Container Properties Cache to be in the Client to cache partition key definition and container rid to avoid unnecessary container reads. See [PR 35293](https://github.com/Azure/azure-sdk-for-python/pull/35293)
1314

1415

1516
#### Other Changes

sdk/cosmos/azure-cosmos/azure/cosmos/_base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,3 +853,10 @@ def _format_batch_operations(
853853
final_operations.append(operation)
854854

855855
return final_operations
856+
857+
858+
def _set_properties_cache(properties: Dict[str, Any]) -> Dict[str, Any]:
859+
return {
860+
"_self": properties.get("_self", None), "_rid": properties.get("_rid", None),
861+
"partitionKey": properties.get("partitionKey", None)
862+
}

sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
)
4646

4747
from . import _base as base
48+
from ._base import _set_properties_cache
4849
from . import documents
4950
from .documents import ConnectionPolicy, DatabaseAccount
5051
from ._constants import _Constants as Constants
@@ -143,7 +144,7 @@ def __init__(
143144

144145
self.connection_policy = connection_policy or ConnectionPolicy()
145146
self.partition_resolvers: Dict[str, RangePartitionResolver] = {}
146-
self.partition_key_definition_cache: Dict[str, Any] = {}
147+
self.__container_properties_cache: Dict[str, Dict[str, Any]] = {}
147148
self.default_headers: Dict[str, Any] = {
148149
http_constants.HttpHeaders.CacheControl: "no-cache",
149150
http_constants.HttpHeaders.Version: http_constants.Versions.CurrentVersion,
@@ -230,6 +231,26 @@ def __init__(
230231
self.session: Optional[_session.Session] = None
231232
self._set_client_consistency_level(database_account, consistency_level)
232233

234+
@property
235+
def _container_properties_cache(self) -> Dict[str, Dict[str, Any]]:
236+
"""Gets the container properties cache from the client.
237+
:returns: the container properties cache for the client.
238+
:rtype: Dict[str, Dict[str, Any]]"""
239+
return self.__container_properties_cache
240+
241+
def _set_container_properties_cache(self, container_link: str, properties: Optional[Dict[str, Any]]) -> None:
242+
"""Sets the container properties cache for the specified container.
243+
244+
This will only update the properties cache for a specified container.
245+
:param container_link: The container link will be used as the key to cache the container properties.
246+
:type container_link: str
247+
:param properties: These are the container properties to cache.
248+
:type properties: Optional[Dict[str, Any]]"""
249+
if properties:
250+
self.__container_properties_cache[container_link] = properties
251+
else:
252+
self.__container_properties_cache[container_link] = {}
253+
233254
def _set_client_consistency_level(
234255
self,
235256
database_account: DatabaseAccount,
@@ -1261,7 +1282,6 @@ def CreateItem(
12611282

12621283
if base.IsItemContainerLink(database_or_container_link):
12631284
options = self._AddPartitionKey(database_or_container_link, document, options)
1264-
12651285
return self.Create(document, path, "docs", collection_id, None, options, **kwargs)
12661286

12671287
def UpsertItem(
@@ -3270,13 +3290,14 @@ def _UpdateSessionIfRequired(
32703290
self.session.update_session(response_result, response_headers)
32713291

32723292
def _get_partition_key_definition(self, collection_link: str) -> Optional[Dict[str, Any]]:
3273-
partition_key_definition = None
3293+
partition_key_definition: Optional[Dict[str, Any]]
32743294
# If the document collection link is present in the cache, then use the cached partitionkey definition
3275-
if collection_link in self.partition_key_definition_cache:
3276-
partition_key_definition = self.partition_key_definition_cache.get(collection_link)
3295+
if collection_link in self.__container_properties_cache:
3296+
cached_container: Dict[str, Any] = self.__container_properties_cache.get(collection_link, {})
3297+
partition_key_definition = cached_container.get("partitionKey")
32773298
# Else read the collection from backend and add it to the cache
32783299
else:
3279-
collection = self.ReadContainer(collection_link)
3280-
partition_key_definition = collection.get("partitionKey")
3281-
self.partition_key_definition_cache[collection_link] = partition_key_definition
3300+
container = self.ReadContainer(collection_link)
3301+
partition_key_definition = container.get("partitionKey")
3302+
self.__container_properties_cache[collection_link] = _set_properties_cache(container)
32823303
return partition_key_definition

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
validate_cache_staleness_value,
3737
_deserialize_throughput,
3838
_replace_throughput,
39-
GenerateGuidId
39+
GenerateGuidId,
40+
_set_properties_cache
4041
)
4142
from ..exceptions import CosmosResourceNotFoundError
4243
from ..http_constants import StatusCodes
@@ -81,19 +82,21 @@ def __init__(
8182
) -> None:
8283
self.client_connection = client_connection
8384
self.id = id
84-
self._properties = properties
8585
self.database_link = database_link
8686
self.container_link = "{}/colls/{}".format(database_link, self.id)
8787
self._is_system_key: Optional[bool] = None
8888
self._scripts: Optional[ScriptsProxy] = None
89+
if properties:
90+
self.client_connection._set_container_properties_cache(self.container_link,
91+
_set_properties_cache(properties)) # pylint: disable=protected-access, line-too-long
8992

9093
def __repr__(self) -> str:
9194
return "<ContainerProxy [{}]>".format(self.container_link)[:1024]
9295

9396
async def _get_properties(self) -> Dict[str, Any]:
94-
if self._properties is None:
95-
self._properties = await self.read()
96-
return self._properties
97+
if self.container_link not in self.client_connection._container_properties_cache: # pylint: disable=protected-access, line-too-long
98+
await self.read()
99+
return self.client_connection._container_properties_cache[self.container_link] # pylint: disable=protected-access, line-too-long
97100

98101
@property
99102
async def is_system_key(self) -> bool:
@@ -167,12 +170,10 @@ async def read(
167170
request_options["populatePartitionKeyRangeStatistics"] = populate_partition_key_range_statistics
168171
if populate_quota_info is not None:
169172
request_options["populateQuotaInfo"] = populate_quota_info
170-
171-
collection_link = self.container_link
172-
self._properties = await self.client_connection.ReadContainer(
173-
collection_link, options=request_options, **kwargs
174-
)
175-
return self._properties
173+
container = await self.client_connection.ReadContainer(self.container_link, options=request_options, **kwargs)
174+
# Only cache Container Properties that will not change in the lifetime of the container
175+
self.client_connection._set_container_properties_cache(self.container_link, _set_properties_cache(container)) # pylint: disable=protected-access, line-too-long
176+
return container
176177

177178
@distributed_trace_async
178179
async def create_item(

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
ProxyPolicy)
4848

4949
from .. import _base as base
50+
from .._base import _set_properties_cache
5051
from .. import documents
5152
from .._routing import routing_range
5253
from ..documents import ConnectionPolicy, DatabaseAccount
@@ -144,7 +145,7 @@ def __init__(
144145

145146
self.connection_policy = connection_policy or ConnectionPolicy()
146147
self.partition_resolvers: Dict[str, RangePartitionResolver] = {}
147-
self.partition_key_definition_cache: Dict[str, Any] = {}
148+
self.__container_properties_cache: Dict[str, Dict[str, Any]] = {}
148149
self.default_headers: Dict[str, Any] = {
149150
http_constants.HttpHeaders.CacheControl: "no-cache",
150151
http_constants.HttpHeaders.Version: http_constants.Versions.CurrentVersion,
@@ -226,6 +227,26 @@ def __init__(
226227
# Routing map provider
227228
self._routing_map_provider: SmartRoutingMapProvider = SmartRoutingMapProvider(self)
228229

230+
@property
231+
def _container_properties_cache(self) -> Dict[str, Dict[str, Any]]:
232+
"""Gets the container properties cache from the client.
233+
:returns: the container properties cache for the client.
234+
:rtype: Dict[str, Dict[str, Any]]"""
235+
return self.__container_properties_cache
236+
237+
def _set_container_properties_cache(self, container_link: str, properties: Optional[Dict[str, Any]]) -> None:
238+
"""Sets the container properties cache for the specified container.
239+
240+
This will only update the properties cache for a specified container.
241+
:param container_link: The container link will be used as the key to cache the container properties.
242+
:type container_link: str
243+
:param properties: These are the container properties to cache.
244+
:type properties: Optional[Dict[str, Any]]"""
245+
if properties:
246+
self.__container_properties_cache[container_link] = properties
247+
else:
248+
self.__container_properties_cache[container_link] = {}
249+
229250
@property
230251
def _Session(self) -> Optional[_session.Session]:
231252
"""Gets the session object from the client.
@@ -3034,13 +3055,14 @@ async def _AddPartitionKey(self, collection_link, document, options):
30343055
# TODO: Refresh the cache if partition is extracted automatically and we get a 400.1001
30353056

30363057
# If the document collection link is present in the cache, then use the cached partitionkey definition
3037-
if collection_link in self.partition_key_definition_cache:
3038-
partitionKeyDefinition = self.partition_key_definition_cache.get(collection_link)
3058+
if collection_link in self.__container_properties_cache:
3059+
cached_container = self.__container_properties_cache.get(collection_link)
3060+
partitionKeyDefinition = cached_container.get("partitionKey")
30393061
# Else read the collection from backend and add it to the cache
30403062
else:
3041-
collection = await self.ReadContainer(collection_link)
3042-
partitionKeyDefinition = collection.get("partitionKey")
3043-
self.partition_key_definition_cache[collection_link] = partitionKeyDefinition
3063+
container = await self.ReadContainer(collection_link)
3064+
partitionKeyDefinition = container.get("partitionKey")
3065+
self.__container_properties_cache[collection_link] = _set_properties_cache(container)
30443066

30453067
# If the collection doesn't have a partition key definition, skip it as it's a legacy collection
30463068
if partitionKeyDefinition:

sdk/cosmos/azure-cosmos/azure/cosmos/container.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
validate_cache_staleness_value,
3737
_deserialize_throughput,
3838
_replace_throughput,
39-
GenerateGuidId
39+
GenerateGuidId,
40+
_set_properties_cache
4041
)
4142
from .exceptions import CosmosResourceNotFoundError
4243
from .http_constants import StatusCodes
@@ -83,17 +84,19 @@ def __init__(
8384
self.id = id
8485
self.container_link = "{}/colls/{}".format(database_link, self.id)
8586
self.client_connection = client_connection
86-
self._properties = properties
8787
self._is_system_key: Optional[bool] = None
8888
self._scripts: Optional[ScriptsProxy] = None
89+
if properties:
90+
self.client_connection._set_container_properties_cache(self.container_link,
91+
_set_properties_cache(properties)) # pylint: disable=protected-access, line-too-long
8992

9093
def __repr__(self) -> str:
9194
return "<ContainerProxy [{}]>".format(self.container_link)[:1024]
9295

9396
def _get_properties(self) -> Dict[str, Any]:
94-
if self._properties is None:
95-
self._properties = self.read()
96-
return self._properties
97+
if self.container_link not in self.client_connection._container_properties_cache: # pylint: disable=protected-access, line-too-long
98+
self.read()
99+
return self.client_connection._container_properties_cache[self.container_link] # pylint: disable=protected-access, line-too-long
97100

98101
@property
99102
def is_system_key(self) -> bool:
@@ -173,11 +176,10 @@ def read( # pylint:disable=docstring-missing-param
173176
request_options["populatePartitionKeyRangeStatistics"] = populate_partition_key_range_statistics
174177
if populate_quota_info is not None:
175178
request_options["populateQuotaInfo"] = populate_quota_info
176-
collection_link = self.container_link
177-
self._properties = self.client_connection.ReadContainer(
178-
collection_link, options=request_options, **kwargs
179-
)
180-
return self._properties
179+
container = self.client_connection.ReadContainer(self.container_link, options=request_options, **kwargs)
180+
# Only cache Container Properties that will not change in the lifetime of the container
181+
self.client_connection._set_container_properties_cache(self.container_link, _set_properties_cache(container)) # pylint: disable=protected-access, line-too-long
182+
return container
181183

182184
@distributed_trace
183185
def read_item( # pylint:disable=docstring-missing-param
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# -*- coding: utf-8 -*-
2+
# The MIT License (MIT)
3+
# Copyright (c) Microsoft Corporation. All rights reserved.
4+
5+
"""End-to-end test.
6+
"""
7+
8+
import unittest
9+
import uuid
10+
11+
import pytest
12+
13+
import azure.cosmos.cosmos_client as cosmos_client
14+
import azure.cosmos.exceptions as exceptions
15+
import test_config
16+
from azure.cosmos.partition_key import PartitionKey
17+
18+
19+
@pytest.mark.cosmosEmulator
20+
class TestContainerPropertiesCache(unittest.TestCase):
21+
"""Python CRUD Tests.
22+
"""
23+
24+
configs = test_config.TestConfig
25+
host = configs.host
26+
masterKey = configs.masterKey
27+
connectionPolicy = configs.connectionPolicy
28+
last_headers = []
29+
client: cosmos_client.CosmosClient = None
30+
31+
@classmethod
32+
def setUpClass(cls):
33+
if (cls.masterKey == '[YOUR_KEY_HERE]' or
34+
cls.host == '[YOUR_ENDPOINT_HERE]'):
35+
raise Exception(
36+
"You must specify your Azure Cosmos account values for "
37+
"'masterKey' and 'host' at the top of this class to run the "
38+
"tests.")
39+
cls.client = cosmos_client.CosmosClient(cls.host, cls.masterKey)
40+
cls.databaseForTest = cls.client.get_database_client(cls.configs.TEST_DATABASE_ID)
41+
42+
def test_container_properties_cache(self):
43+
client = self.client
44+
database_name = "Container Properties Cache Test DB " + str(uuid.uuid4())
45+
created_db = client.create_database(database_name)
46+
container_name = str(uuid.uuid4())
47+
container_pk = "PK"
48+
# Create The Container
49+
try:
50+
client.get_database_client(database_name).create_container(id=container_name, partition_key=PartitionKey(
51+
path="/" + container_pk))
52+
except exceptions.CosmosResourceExistsError:
53+
pass
54+
# Delete The cache as this is meant to test calling operations on a preexisting container
55+
# and not a freshly made one. It's a private attribute so use mangled name.
56+
client.client_connection._CosmosClientConnection__container_properties_cache = {}
57+
# We will hot path operations to verify cache persists
58+
# This will extract partition key from the item body, which will need partition key definition from
59+
# container properties. We test to check the cache is empty since we just created the container
60+
self.assertTrue(client.client_connection._container_properties_cache == {})
61+
client.get_database_client(database_name).get_container_client(container_name).create_item(
62+
body={'id': 'item1', container_pk: 'value'})
63+
# Since the cache was empty, it should have called a container read to get properties. So now Cache should
64+
# be populated and available even when we don't have a container instance
65+
self.assertTrue(client.client_connection._container_properties_cache != {})
66+
# We can test if the cache properties are correct by comparing them to a fresh read.
67+
# First lets save the old cache values
68+
cached_properties = created_db.get_container_client(container_name)._get_properties()
69+
# Get the container dictionary out of a fresh container read
70+
fresh_container_read = created_db.get_container_client(container_name).read()
71+
# Now we can compare the RID and Partition Key Definition
72+
self.assertEqual(cached_properties.get("_rid"), fresh_container_read.get("_rid"))
73+
self.assertEqual(cached_properties.get("partitionKey"), fresh_container_read.get("partitionKey"))
74+
client.delete_database(created_db)
75+
76+
77+
if __name__ == '__main__':
78+
try:
79+
unittest.main()
80+
except SystemExit as inst:
81+
if inst.args[0] is True: # raised by sys.exit(True) when tests failed
82+
raise

0 commit comments

Comments
 (0)