Skip to content

Commit 8bc01f5

Browse files
Container properties cache consolidation (#35731)
* Add client cache to hold container properties This adds a client level cache for container properties. * Added tests for container properties cache * Update _cosmos_client_connection_async.py * Update _cosmos_client_connection_async.py * Fix typos * Update container.py * Update _container.py * Update cache test * Update test_container_properties_cache.py * update tests * Update test_container_properties_cache.py * Update tests updated tests that were compensating for extra container reads, the extra reads no longer happen with adding the container cache properties in the client. * tests updates * Mypy fixes * mypy fix * Update * Cache update Fixed some reccomnded changes, and added optimizations for the cache. Only including needed properties info that will be unlikely to change. * test update * Update CHANGELOG.md * added set_properties_cache method Added a method in base.py to set the properties cache so we only have to change one location in case in the future we want to cache additional properties * Update _base.py * Update _cosmos_client_connection.py * MyPy fixes * Fix mypy issue * Update _cosmos_client_connection.py * Update _cosmos_client_connection.py * Update _cosmos_client_connection_async.py * Update _cosmos_client_connection.py * Update container cache Changed container cache to be a private attribute. Also added tests to verify the contents of the cache to be correct ones. * Update sdk/cosmos/azure-cosmos/CHANGELOG.md Co-authored-by: Kushagra Thapar <[email protected]> * Additional pylint updates * more pylint fixes * pylint fix * Update test_container_properties_cache.py * Container Cache updates Fixed broken tests that used old properties cache. Added samples to show how new properties cache works and best way to get the container properties * Update CHANGELOG.md --------- Co-authored-by: Kushagra Thapar <[email protected]>
1 parent 83c18b8 commit 8bc01f5

17 files changed

+347
-75
lines changed

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#### Breaking Changes
88

99
#### Bugs Fixed
10+
* Consolidated Container Properties Cache to be in the Client to cache partition key definition and container rid to avoid unnecessary container reads. See [PR 35731](https://github.com/Azure/azure-sdk-for-python/pull/35731)
1011

1112
#### Other Changes
1213

sdk/cosmos/azure-cosmos/azure/cosmos/_base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,3 +853,10 @@ def _format_batch_operations(
853853
final_operations.append(operation)
854854

855855
return final_operations
856+
857+
858+
def _set_properties_cache(properties: Dict[str, Any]) -> Dict[str, Any]:
859+
return {
860+
"_self": properties.get("_self", None), "_rid": properties.get("_rid", None),
861+
"partitionKey": properties.get("partitionKey", None)
862+
}

sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
)
4747

4848
from . import _base as base
49+
from ._base import _set_properties_cache
4950
from . import documents
5051
from .documents import ConnectionPolicy, DatabaseAccount
5152
from ._constants import _Constants as Constants
@@ -144,7 +145,7 @@ def __init__(
144145

145146
self.connection_policy = connection_policy or ConnectionPolicy()
146147
self.partition_resolvers: Dict[str, RangePartitionResolver] = {}
147-
self.partition_key_definition_cache: Dict[str, Any] = {}
148+
self.__container_properties_cache: Dict[str, Dict[str, Any]] = {}
148149
self.default_headers: Dict[str, Any] = {
149150
http_constants.HttpHeaders.CacheControl: "no-cache",
150151
http_constants.HttpHeaders.Version: http_constants.Versions.CurrentVersion,
@@ -231,6 +232,26 @@ def __init__(
231232
self.session: Optional[_session.Session] = None
232233
self._set_client_consistency_level(database_account, consistency_level)
233234

235+
@property
236+
def _container_properties_cache(self) -> Dict[str, Dict[str, Any]]:
237+
"""Gets the container properties cache from the client.
238+
:returns: the container properties cache for the client.
239+
:rtype: Dict[str, Dict[str, Any]]"""
240+
return self.__container_properties_cache
241+
242+
def _set_container_properties_cache(self, container_link: str, properties: Optional[Dict[str, Any]]) -> None:
243+
"""Sets the container properties cache for the specified container.
244+
245+
This will only update the properties cache for a specified container.
246+
:param container_link: The container link will be used as the key to cache the container properties.
247+
:type container_link: str
248+
:param properties: These are the container properties to cache.
249+
:type properties: Optional[Dict[str, Any]]"""
250+
if properties:
251+
self.__container_properties_cache[container_link] = properties
252+
else:
253+
self.__container_properties_cache[container_link] = {}
254+
234255
def _set_client_consistency_level(
235256
self,
236257
database_account: DatabaseAccount,
@@ -1262,7 +1283,6 @@ def CreateItem(
12621283

12631284
if base.IsItemContainerLink(database_or_container_link):
12641285
options = self._AddPartitionKey(database_or_container_link, document, options)
1265-
12661286
return self.Create(document, path, "docs", collection_id, None, options, **kwargs)
12671287

12681288
def UpsertItem(
@@ -3280,13 +3300,14 @@ def _UpdateSessionIfRequired(
32803300
self.session.update_session(response_result, response_headers)
32813301

32823302
def _get_partition_key_definition(self, collection_link: str) -> Optional[Dict[str, Any]]:
3283-
partition_key_definition = None
3303+
partition_key_definition: Optional[Dict[str, Any]]
32843304
# If the document collection link is present in the cache, then use the cached partitionkey definition
3285-
if collection_link in self.partition_key_definition_cache:
3286-
partition_key_definition = self.partition_key_definition_cache.get(collection_link)
3305+
if collection_link in self.__container_properties_cache:
3306+
cached_container: Dict[str, Any] = self.__container_properties_cache.get(collection_link, {})
3307+
partition_key_definition = cached_container.get("partitionKey")
32873308
# Else read the collection from backend and add it to the cache
32883309
else:
3289-
collection = self.ReadContainer(collection_link)
3290-
partition_key_definition = collection.get("partitionKey")
3291-
self.partition_key_definition_cache[collection_link] = partition_key_definition
3310+
container = self.ReadContainer(collection_link)
3311+
partition_key_definition = container.get("partitionKey")
3312+
self.__container_properties_cache[collection_link] = _set_properties_cache(container)
32923313
return partition_key_definition

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
validate_cache_staleness_value,
3737
_deserialize_throughput,
3838
_replace_throughput,
39-
GenerateGuidId
39+
GenerateGuidId,
40+
_set_properties_cache
4041
)
4142
from ..exceptions import CosmosResourceNotFoundError
4243
from ..http_constants import StatusCodes
@@ -81,19 +82,21 @@ def __init__(
8182
) -> None:
8283
self.client_connection = client_connection
8384
self.id = id
84-
self._properties = properties
8585
self.database_link = database_link
8686
self.container_link = "{}/colls/{}".format(database_link, self.id)
8787
self._is_system_key: Optional[bool] = None
8888
self._scripts: Optional[ScriptsProxy] = None
89+
if properties:
90+
self.client_connection._set_container_properties_cache(self.container_link,
91+
_set_properties_cache(properties)) # pylint: disable=protected-access, line-too-long
8992

9093
def __repr__(self) -> str:
9194
return "<ContainerProxy [{}]>".format(self.container_link)[:1024]
9295

9396
async def _get_properties(self) -> Dict[str, Any]:
94-
if self._properties is None:
95-
self._properties = await self.read()
96-
return self._properties
97+
if self.container_link not in self.client_connection._container_properties_cache: # pylint: disable=protected-access, line-too-long
98+
await self.read()
99+
return self.client_connection._container_properties_cache[self.container_link] # pylint: disable=protected-access, line-too-long
97100

98101
@property
99102
async def is_system_key(self) -> bool:
@@ -167,12 +170,10 @@ async def read(
167170
request_options["populatePartitionKeyRangeStatistics"] = populate_partition_key_range_statistics
168171
if populate_quota_info is not None:
169172
request_options["populateQuotaInfo"] = populate_quota_info
170-
171-
collection_link = self.container_link
172-
self._properties = await self.client_connection.ReadContainer(
173-
collection_link, options=request_options, **kwargs
174-
)
175-
return self._properties
173+
container = await self.client_connection.ReadContainer(self.container_link, options=request_options, **kwargs)
174+
# Only cache Container Properties that will not change in the lifetime of the container
175+
self.client_connection._set_container_properties_cache(self.container_link, _set_properties_cache(container)) # pylint: disable=protected-access, line-too-long
176+
return container
176177

177178
@distributed_trace_async
178179
async def create_item(

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
ProxyPolicy)
4949

5050
from .. import _base as base
51+
from .._base import _set_properties_cache
5152
from .. import documents
5253
from .._routing import routing_range
5354
from ..documents import ConnectionPolicy, DatabaseAccount
@@ -145,7 +146,7 @@ def __init__(
145146

146147
self.connection_policy = connection_policy or ConnectionPolicy()
147148
self.partition_resolvers: Dict[str, RangePartitionResolver] = {}
148-
self.partition_key_definition_cache: Dict[str, Any] = {}
149+
self.__container_properties_cache: Dict[str, Dict[str, Any]] = {}
149150
self.default_headers: Dict[str, Any] = {
150151
http_constants.HttpHeaders.CacheControl: "no-cache",
151152
http_constants.HttpHeaders.Version: http_constants.Versions.CurrentVersion,
@@ -227,6 +228,26 @@ def __init__(
227228
# Routing map provider
228229
self._routing_map_provider: SmartRoutingMapProvider = SmartRoutingMapProvider(self)
229230

231+
@property
232+
def _container_properties_cache(self) -> Dict[str, Dict[str, Any]]:
233+
"""Gets the container properties cache from the client.
234+
:returns: the container properties cache for the client.
235+
:rtype: Dict[str, Dict[str, Any]]"""
236+
return self.__container_properties_cache
237+
238+
def _set_container_properties_cache(self, container_link: str, properties: Optional[Dict[str, Any]]) -> None:
239+
"""Sets the container properties cache for the specified container.
240+
241+
This will only update the properties cache for a specified container.
242+
:param container_link: The container link will be used as the key to cache the container properties.
243+
:type container_link: str
244+
:param properties: These are the container properties to cache.
245+
:type properties: Optional[Dict[str, Any]]"""
246+
if properties:
247+
self.__container_properties_cache[container_link] = properties
248+
else:
249+
self.__container_properties_cache[container_link] = {}
250+
230251
@property
231252
def _Session(self) -> Optional[_session.Session]:
232253
"""Gets the session object from the client.
@@ -3035,13 +3056,14 @@ async def _AddPartitionKey(self, collection_link, document, options):
30353056
# TODO: Refresh the cache if partition is extracted automatically and we get a 400.1001
30363057

30373058
# If the document collection link is present in the cache, then use the cached partitionkey definition
3038-
if collection_link in self.partition_key_definition_cache:
3039-
partitionKeyDefinition = self.partition_key_definition_cache.get(collection_link)
3059+
if collection_link in self.__container_properties_cache:
3060+
cached_container = self.__container_properties_cache.get(collection_link)
3061+
partitionKeyDefinition = cached_container.get("partitionKey")
30403062
# Else read the collection from backend and add it to the cache
30413063
else:
3042-
collection = await self.ReadContainer(collection_link)
3043-
partitionKeyDefinition = collection.get("partitionKey")
3044-
self.partition_key_definition_cache[collection_link] = partitionKeyDefinition
3064+
container = await self.ReadContainer(collection_link)
3065+
partitionKeyDefinition = container.get("partitionKey")
3066+
self.__container_properties_cache[collection_link] = _set_properties_cache(container)
30453067

30463068
# If the collection doesn't have a partition key definition, skip it as it's a legacy collection
30473069
if partitionKeyDefinition:

sdk/cosmos/azure-cosmos/azure/cosmos/container.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
validate_cache_staleness_value,
3737
_deserialize_throughput,
3838
_replace_throughput,
39-
GenerateGuidId
39+
GenerateGuidId,
40+
_set_properties_cache
4041
)
4142
from .exceptions import CosmosResourceNotFoundError
4243
from .http_constants import StatusCodes
@@ -83,17 +84,19 @@ def __init__(
8384
self.id = id
8485
self.container_link = "{}/colls/{}".format(database_link, self.id)
8586
self.client_connection = client_connection
86-
self._properties = properties
8787
self._is_system_key: Optional[bool] = None
8888
self._scripts: Optional[ScriptsProxy] = None
89+
if properties:
90+
self.client_connection._set_container_properties_cache(self.container_link,
91+
_set_properties_cache(properties)) # pylint: disable=protected-access, line-too-long
8992

9093
def __repr__(self) -> str:
9194
return "<ContainerProxy [{}]>".format(self.container_link)[:1024]
9295

9396
def _get_properties(self) -> Dict[str, Any]:
94-
if self._properties is None:
95-
self._properties = self.read()
96-
return self._properties
97+
if self.container_link not in self.client_connection._container_properties_cache: # pylint: disable=protected-access, line-too-long
98+
self.read()
99+
return self.client_connection._container_properties_cache[self.container_link] # pylint: disable=protected-access, line-too-long
97100

98101
@property
99102
def is_system_key(self) -> bool:
@@ -173,11 +176,10 @@ def read( # pylint:disable=docstring-missing-param
173176
request_options["populatePartitionKeyRangeStatistics"] = populate_partition_key_range_statistics
174177
if populate_quota_info is not None:
175178
request_options["populateQuotaInfo"] = populate_quota_info
176-
collection_link = self.container_link
177-
self._properties = self.client_connection.ReadContainer(
178-
collection_link, options=request_options, **kwargs
179-
)
180-
return self._properties
179+
container = self.client_connection.ReadContainer(self.container_link, options=request_options, **kwargs)
180+
# Only cache Container Properties that will not change in the lifetime of the container
181+
self.client_connection._set_container_properties_cache(self.container_link, _set_properties_cache(container)) # pylint: disable=protected-access, line-too-long
182+
return container
181183

182184
@distributed_trace
183185
def read_item( # pylint:disable=docstring-missing-param

sdk/cosmos/azure-cosmos/samples/examples.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,34 @@
142142
print(json.dumps(properties, indent=True))
143143
# [END get_database_properties]
144144

145+
# Retrieve the properties of a container
146+
# [START get_container_properties]
147+
# Get properties will return a cache of two container properties: RID and the Partition Key Definition (This will not consume RUs)
148+
properties = container._get_properties()
149+
150+
# Print _rid and partitionKey
151+
print("Resource ID: ", properties.get('_rid'))
152+
print("Partition Key: ", properties.get('partitionKey'))
153+
154+
# Read the container to get the latests of all the Container Properties. (This will make a backend requests and will consume RUs)
155+
container_properties = container.read()
156+
157+
# Print each property one by one if they are currently in the container properties
158+
print("indexingPolicy: ", container_properties.get("indexingPolicy"))
159+
print("etag: ", container_properties.get('_etag'))
160+
print("lastModified: ", container_properties.get('lastModified'))
161+
print("defaultTtl: ", container_properties.get('defaultTtl'))
162+
print("uniqueKeyPolicy: ", container_properties.get('uniqueKeyPolicy'))
163+
print("conflictResolutionPolicy: ", container_properties.get('conflictResolutionPolicy'))
164+
print("changeFeedPolicy: ", container_properties.get('changeFeedPolicy'))
165+
print("geospatialConfig: ", container_properties.get('geospatialConfig'))
166+
167+
# Print remaining properties if they are in the current container properties
168+
for key, value in container_properties.items():
169+
if key not in ['_rid', 'partitionKey', 'indexingPolicy', '_etag', 'lastModified', 'defaultTtl', 'uniqueKeyPolicy', 'conflictResolutionPolicy', 'changeFeedPolicy', 'geospatialConfig']:
170+
print(f"{key}: {value}")
171+
# [END get_container_properties]
172+
145173
# Modify the properties of an existing container
146174
# This example sets the default time to live (TTL) for items in the
147175
# container to 3600 seconds (1 hour). An item in container is deleted

sdk/cosmos/azure-cosmos/samples/examples_async.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,35 @@ async def examples_async():
147147
print(json.dumps(properties, indent=True))
148148
# [END get_database_properties]
149149

150+
# Retrieve the properties of a container
151+
# [START get_container_properties]
152+
# Get properties will return a cache of two container properties: RID and the Partition Key Definition (This will not consume RUs)
153+
properties = await container._get_properties()
154+
155+
# Print _rid and partitionKey
156+
print("Resource ID: ", properties.get('_rid'))
157+
print("Partition Key: ", properties.get('partitionKey'))
158+
159+
# Read the container to get the latests of all the Container Properties. (This will make a backend requests and will consume RUs)
160+
container_properties = await container.read()
161+
162+
# Print each property one by one if they are currently in the container properties
163+
print("indexingPolicy: ", container_properties.get("indexingPolicy"))
164+
print("etag: ", container_properties.get('_etag'))
165+
print("lastModified: ", container_properties.get('lastModified'))
166+
print("defaultTtl: ", container_properties.get('defaultTtl'))
167+
print("uniqueKeyPolicy: ", container_properties.get('uniqueKeyPolicy'))
168+
print("conflictResolutionPolicy: ", container_properties.get('conflictResolutionPolicy'))
169+
print("changeFeedPolicy: ", container_properties.get('changeFeedPolicy'))
170+
print("geospatialConfig: ", container_properties.get('geospatialConfig'))
171+
172+
# Print remaining properties if they are in the current container properties
173+
for key, value in container_properties.items():
174+
if key not in ['_rid', 'partitionKey', 'indexingPolicy', '_etag', 'lastModified', 'defaultTtl', 'uniqueKeyPolicy',
175+
'conflictResolutionPolicy', 'changeFeedPolicy', 'geospatialConfig']:
176+
print(f"{key}: {value}")
177+
# [END get_container_properties]
178+
150179
# Modify the properties of an existing container
151180
# This example sets the default time to live (TTL) for items in the
152181
# container to 3600 seconds (1 hour). An item in container is deleted

0 commit comments

Comments
 (0)