Skip to content

Commit cc5055b

Browse files
authored
Additional cosmos diagnostics logging support (#41012)
* Update _base.py * update diagnostics to capture all cosmos exceptions This addition to cosmos logging will capture exceptions even if they get retried and succeed, it will also capture servicerequest and serviceresponse errors. * update cosmos diagnostics * Update diagnostics These changes include using on exception to capture logs that were not thrown with a response. This includes 500s, service request and service response errors. This also fixes issues with hiechical loggers and making sure we check all filters to see if we should add the log attributes to all request and response. * update typos and check result[1] * update diagnostics logs * small updates * update tests and mypy and pylint fixes * Update _cosmos_http_logging_policy.py * update pylint and test fixes * Update CHANGELOG.md
1 parent 2dcb122 commit cc5055b

9 files changed

+539
-74
lines changed

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
* Fixed session container session token logic. The SDK will now only send the relevant partition-local session tokens for read document requests and write requests when multi-region writes are enabled, as opposed to the entire compound session token for the container for every document request. See [PR 41678](https://github.com/Azure/azure-sdk-for-python/pull/41678).
1212
* Write requests for single-write region accounts will no longer send session tokens when using session consistency. See [PR 41678](https://github.com/Azure/azure-sdk-for-python/pull/41678).
1313
* Fixed bug where container cache was not being properly updated resulting in unnecessary extra requests. See [PR 42143](https://github.com/Azure/azure-sdk-for-python/pull/42143).
14+
* Fixed bug where the Filters on Parent loggers or handlers of Cosmos Diagnostics loggers were not being applied. See [PR 41012](https://github.com/Azure/azure-sdk-for-python/pull/41012)
1415

1516
#### Other Changes
1617
* Changed to include client id in headers for all requests. See [PR 42104](https://github.com/Azure/azure-sdk-for-python/pull/42104).
1718
* Added an option to override AAD audience scope through environment variable. See [PR 42228](https://github.com/Azure/azure-sdk-for-python/pull/42228).
19+
* Diagnostics logging will now log ServiceRequestError Exceptions, ServiceResponseError Exceptions, and Cosmos exceptions with a status code of 500 or greater. See [PR 41012](https://github.com/Azure/azure-sdk-for-python/pull/41012)
1820

1921
### 4.14.0b1 (2025-07-14)
2022

sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def __init__( # pylint: disable=too-many-statements
207207
else:
208208
scope = base.create_scope_from_url(self.url_connection)
209209
credentials_policy = CosmosBearerTokenCredentialPolicy(self.aad_credentials, scope)
210-
210+
self._enable_diagnostics_logging = kwargs.pop("enable_diagnostics_logging", False)
211211
policies = [
212212
HeadersPolicy(**kwargs),
213213
ProxyPolicy(proxies=proxies),
@@ -220,7 +220,7 @@ def __init__( # pylint: disable=too-many-statements
220220
DistributedTracingPolicy(**kwargs),
221221
CosmosHttpLoggingPolicy(
222222
logger=kwargs.pop("logger", None),
223-
enable_diagnostics_logging=kwargs.pop("enable_diagnostics_logging", False),
223+
enable_diagnostics_logging=self._enable_diagnostics_logging,
224224
global_endpoint_manager=self._global_endpoint_manager,
225225
**kwargs
226226
),

sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_http_logging_policy.py

Lines changed: 285 additions & 66 deletions
Large diffs are not rendered by default.

sdk/cosmos/azure-cosmos/azure/cosmos/_retry_utility.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"""
2424
import json
2525
import time
26+
import logging
2627
from typing import Optional
2728

2829
from azure.core.exceptions import AzureError, ClientAuthenticationError, ServiceRequestError, ServiceResponseError
@@ -41,6 +42,7 @@
4142
from .documents import _OperationType
4243
from .exceptions import CosmosHttpResponseError
4344
from .http_constants import HttpHeaders, StatusCodes, SubStatusCodes, ResourceType
45+
from ._cosmos_http_logging_policy import _log_diagnostics_error
4446

4547

4648
# pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
@@ -94,6 +96,8 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
9496
service_request_retry_policy = _service_request_retry_policy.ServiceRequestRetryPolicy(
9597
client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args,
9698
)
99+
# Get logger
100+
logger = kwargs.get("logger", logging.getLogger("azure.cosmos._retry_utility"))
97101
# HttpRequest we would need to modify for Container Recreate Retry Policy
98102
request = None
99103
if args and len(args) > 3:
@@ -136,10 +140,22 @@ def Execute(client, global_endpoint_manager, function, *args, **kwargs): # pylin
136140
response = exceptions.InternalException(status_code=StatusCodes.NOT_FOUND,
137141
headers={HttpHeaders.SubStatus:
138142
SubStatusCodes.THROUGHPUT_OFFER_NOT_FOUND})
139-
raise exceptions.CosmosResourceNotFoundError(
143+
e_offer = exceptions.CosmosResourceNotFoundError(
140144
status_code=StatusCodes.NOT_FOUND,
141145
message="Could not find ThroughputProperties for container " + link,
142146
response=response)
147+
148+
response_headers = result[1] if len(result) > 1 else {}
149+
logger_attributes = {
150+
"duration": time.time() - start_time,
151+
"verb": request.method,
152+
"status_code": e_offer.status_code,
153+
"sub_status_code": e_offer.sub_status,
154+
}
155+
_log_diagnostics_error(client._enable_diagnostics_logging, request, response_headers, e_offer,
156+
logger_attributes, global_endpoint_manager, logger=logger)
157+
raise e_offer
158+
143159
return result
144160
except exceptions.CosmosHttpResponseError as e:
145161
if request:

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def __init__( # pylint: disable=too-many-statements
217217
else:
218218
scope = base.create_scope_from_url(self.url_connection)
219219
credentials_policy = AsyncCosmosBearerTokenCredentialPolicy(self.aad_credentials, scope)
220-
220+
self._enable_diagnostics_logging = kwargs.pop("enable_diagnostics_logging", False)
221221
policies = [
222222
HeadersPolicy(**kwargs),
223223
ProxyPolicy(proxies=proxies),
@@ -230,7 +230,7 @@ def __init__( # pylint: disable=too-many-statements
230230
DistributedTracingPolicy(**kwargs),
231231
CosmosHttpLoggingPolicy(
232232
logger=kwargs.pop("logger", None),
233-
enable_diagnostics_logging=kwargs.pop("enable_diagnostics_logging", False),
233+
enable_diagnostics_logging=self._enable_diagnostics_logging,
234234
global_endpoint_manager=self._global_endpoint_manager,
235235
**kwargs
236236
),

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_retry_utility_async.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import asyncio # pylint: disable=do-not-import-asyncio
2525
import json
2626
import time
27+
import logging
2728

2829
from azure.core.exceptions import AzureError, ClientAuthenticationError, ServiceRequestError, ServiceResponseError
2930
from azure.core.pipeline.policies import AsyncRetryPolicy
@@ -42,6 +43,7 @@
4243
_has_database_account_header)
4344
from ..exceptions import CosmosHttpResponseError
4445
from ..http_constants import HttpHeaders, StatusCodes, SubStatusCodes
46+
from .._cosmos_http_logging_policy import _log_diagnostics_error
4547

4648

4749
# pylint: disable=protected-access, disable=too-many-lines, disable=too-many-statements, disable=too-many-branches
@@ -93,6 +95,8 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
9395
service_request_retry_policy = _service_request_retry_policy.ServiceRequestRetryPolicy(
9496
client.connection_policy, global_endpoint_manager, pk_range_wrapper, *args,
9597
)
98+
# Get Logger
99+
logger = kwargs.get("logger", logging.getLogger("azure.cosmos._retry_utility_async"))
96100
# HttpRequest we would need to modify for Container Recreate Retry Policy
97101
request = None
98102
if args and len(args) > 3:
@@ -135,10 +139,22 @@ async def ExecuteAsync(client, global_endpoint_manager, function, *args, **kwarg
135139
response = exceptions.InternalException(status_code=StatusCodes.NOT_FOUND,
136140
headers={HttpHeaders.SubStatus:
137141
SubStatusCodes.THROUGHPUT_OFFER_NOT_FOUND})
138-
raise exceptions.CosmosResourceNotFoundError(
142+
e_offer = exceptions.CosmosResourceNotFoundError(
139143
status_code=StatusCodes.NOT_FOUND,
140144
message="Could not find ThroughputProperties for container " + link,
141145
response=response)
146+
147+
response_headers = result[1] if len(result) > 1 else {}
148+
logger_attributes = {
149+
"duration": time.time() - start_time,
150+
"verb": request.method,
151+
"status_code": e_offer.status_code,
152+
"sub_status_code": e_offer.sub_status,
153+
}
154+
_log_diagnostics_error(client._enable_diagnostics_logging, request, response_headers, e_offer,
155+
logger_attributes, global_endpoint_manager, logger=logger)
156+
raise e_offer
157+
142158
return result
143159
except exceptions.CosmosHttpResponseError as e:
144160
if request:

sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,24 @@ class ResourceType:
467467
Topology = "topology"
468468
DatabaseAccount = "databaseaccount"
469469
PartitionKey = "partitionkey"
470+
471+
@staticmethod
472+
def IsCollectionChild(resourceType: str) -> bool:
473+
return resourceType in (
474+
ResourceType.Document,
475+
ResourceType.Attachment,
476+
ResourceType.Conflict,
477+
ResourceType.Schema,
478+
ResourceType.UserDefinedFunction,
479+
ResourceType.Trigger,
480+
ResourceType.StoredProcedure,
481+
ResourceType.PartitionKey,
482+
)
483+
484+
# The list of headers we do not want to log, it needs to be updated if any new headers should not be logged
485+
_cosmos_disallow_list = ["Authorization", "ProxyAuthorization", "TransferEncoding"]
486+
_cosmos_allow_list = set(
487+
v.lower()
488+
for k, v in HttpHeaders.__dict__.items()
489+
if not k.startswith("_") and k not in _cosmos_disallow_list
490+
)

sdk/cosmos/azure-cosmos/tests/test_cosmos_http_logging_policy.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import azure.cosmos.cosmos_client as cosmos_client
1313
from azure.cosmos import PartitionKey, ContainerProxy
14+
from azure.cosmos.exceptions import CosmosHttpResponseError
1415
import test_config
1516
from _fault_injection_transport import FaultInjectionTransport
1617
from test_fault_injection_transport import TestFaultInjectionTransport
@@ -101,6 +102,25 @@ def setUpClass(cls):
101102
logger=cls.logger_filtered_diagnostic,
102103
enable_diagnostics_logging=True)
103104

105+
# Create a root logger with a mock handler and a filter for status codes above 400
106+
cls.root_mock_handler = test_config.MockHandler()
107+
cls.root_mock_handler.addFilter(FilterStatusCode())
108+
cls.root_logger = create_logger("rootLogger", cls.root_mock_handler)
109+
110+
# Create child loggers
111+
cls.root_logger_child = logging.getLogger("rootLogger.child")
112+
cls.root_logger_grandchild = logging.getLogger("rootLogger.child.grandchild")
113+
114+
# Use the grandchild logger for the Cosmos client
115+
cls.client_grandchild_logger = cosmos_client.CosmosClient(
116+
cls.host,
117+
cls.masterKey,
118+
consistency_level="Session",
119+
connection_policy=cls.connectionPolicy,
120+
logger=cls.root_logger_grandchild,
121+
enable_diagnostics_logging=True
122+
)
123+
104124
def test_default_http_logging_policy(self):
105125
# Test if we can log into from creating a database
106126
database_id = "database_test-" + str(uuid.uuid4())
@@ -327,6 +347,78 @@ def test_activity_id_logging_policy(self):
327347
self.client_activity_id.delete_database(database_id)
328348
self.mock_handler_activity_id.reset()
329349

350+
def test_logging_exceptions_with_no_response(self):
351+
# Create a mock handler and logger for capturing logs
352+
mock_handler = test_config.MockHandler()
353+
logger = create_logger("test_logger_fault_injection", mock_handler)
354+
355+
# Set up FaultInjectionTransport to inject a 502 error
356+
id_value = str(uuid.uuid4())
357+
document_definition = {'id': id_value,
358+
'pk': id_value,
359+
'name': 'sample document',
360+
'key': 'value'}
361+
custom_transport = FaultInjectionTransport()
362+
predicate: Callable[[HttpRequest], bool] = lambda r: FaultInjectionTransport.predicate_req_for_document_with_id(
363+
r, id_value)
364+
custom_transport.add_fault(predicate, lambda r: FaultInjectionTransport.error_after_delay(
365+
1000,
366+
CosmosHttpResponseError(
367+
status_code=502,
368+
message="Some random reverse proxy error.")))
369+
370+
371+
# Initialize the client with the custom transport and logger
372+
initialized_objects = TestFaultInjectionTransport.setup_method_with_custom_transport(
373+
custom_transport,
374+
default_endpoint=CONFIG.host,
375+
key=CONFIG.masterKey,
376+
database_id=CONFIG.TEST_DATABASE_ID,
377+
container_id=CONFIG.TEST_SINGLE_PARTITION_CONTAINER_ID,
378+
preferred_locations=[L1, L2],
379+
excluded_locations=[],
380+
multiple_write_locations=True,
381+
custom_logger=logger
382+
)
383+
mock_handler.reset()
384+
385+
# Attempt to create an item, which should trigger the injected 502 error
386+
container: ContainerProxy = initialized_objects["col"]
387+
try:
388+
container.create_item(body=document_definition)
389+
pytest.fail("Expected exception not thrown")
390+
except CosmosHttpResponseError as cosmosError:
391+
# Verify that the logger captured the 502 error and was called from on_exception
392+
assert any(m.status_code == 502 and "on_exception" in m.funcName for m in mock_handler.messages)
393+
394+
def test_hierarchical_logger_with_filter(self):
395+
# Reset the mock handler before the test
396+
self.root_mock_handler.reset()
397+
398+
# Attempt to read a nonexistent item
399+
database_id = "database_test_hierarchical_logger_" + str(uuid.uuid4())
400+
container_id = "container_test_hierarchical_logger_" + str(uuid.uuid4())
401+
database = self.client_grandchild_logger.create_database(id=database_id)
402+
container = database.create_container(id=container_id, partition_key=PartitionKey(path="/pk"))
403+
404+
try:
405+
container.read_item(item="nonexistent_item", partition_key="nonexistent_pk")
406+
except:
407+
pass
408+
409+
# Verify that the error was logged by the root logger's mock handler
410+
assert len(self.root_mock_handler.messages) == 2
411+
log_record = self.root_mock_handler.messages[0]
412+
assert hasattr(log_record, "status_code")
413+
assert log_record.status_code == 404
414+
assert log_record.name == "rootLogger.child.grandchild"
415+
assert not bool(self.root_logger_grandchild.filters)
416+
assert not bool(self.root_logger_child.filters)
417+
assert bool(self.root_mock_handler.filters)
418+
419+
# Clean up
420+
self.client_grandchild_logger.delete_database(database_id)
421+
self.root_mock_handler.reset()
330422

331423
if __name__ == "__main__":
332-
unittest.main()
424+
unittest.main()

0 commit comments

Comments
 (0)