diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/MalformedResponseTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/MalformedResponseTests.java
new file mode 100644
index 000000000000..d625d4cc4dd8
--- /dev/null
+++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/MalformedResponseTests.java
@@ -0,0 +1,118 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.cosmos.implementation;
+
+import com.azure.cosmos.CosmosAsyncClient;
+import com.azure.cosmos.CosmosAsyncContainer;
+import com.azure.cosmos.CosmosClientBuilder;
+import com.azure.cosmos.CosmosException;
+import com.azure.cosmos.TestObject;
+import com.azure.cosmos.models.PartitionKey;
+import com.azure.cosmos.rx.TestSuiteBase;
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.testng.annotations.AfterSuite;
+import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.Factory;
+import org.testng.annotations.Ignore;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Fail.fail;
+
+@Ignore("MalformedResponseTests is only safe to run in isolation as it leverages Reflection to override the ObjectMapper instance responsible for deserialization.")
+public class MalformedResponseTests extends TestSuiteBase {
+
+ @Factory(dataProvider = "clientBuildersWithSessionConsistency")
+ public MalformedResponseTests(CosmosClientBuilder clientBuilder) {
+ super(clientBuilder);
+ }
+
+ @BeforeSuite(groups = {"emulator"})
+ public void beforeSuite() {
+ System.setProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT", "IGNORE");
+ System.setProperty("COSMOS.IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED", "true");
+ super.beforeSuite();
+ }
+
+ /**
+ * Validate that a CosmosException is thrown with the appropriate status code and sub-status code
+ * when the response from the server is malformed and cannot be deserialized
+ * and fallback decoder is set / not set
+ *
+ * NOTE: Run this test with MalformedResponseTests#beforeSuite and MalformedResponseTests#afterSuite commented out for no fallback decoder.
+ * NOTE: Run this test with MalformedResponseTests#beforeSuite and MalformedResponseTests#afterSuite enabled for fallback decoder.
+ * */
+ @Test(groups = { "emulator" })
+ public void validateCosmosExceptionThrownOnMalformedResponse() throws NoSuchFieldException, IllegalAccessException {
+
+ CosmosAsyncClient cosmosAsyncClient = null;
+ ObjectMapper originalMapper = null;
+
+ try {
+ cosmosAsyncClient = getClientBuilder()
+ .key(TestConfigurations.MASTER_KEY)
+ .endpoint(TestConfigurations.HOST)
+ .buildAsyncClient();
+ CosmosAsyncContainer cosmosAsyncContainer = getSharedSinglePartitionCosmosContainer(cosmosAsyncClient);
+
+ TestObject testObject = TestObject.create();
+ cosmosAsyncContainer.createItem(testObject).block();
+
+ Field field = Utils.class.getDeclaredField("simpleObjectMapper");
+ field.setAccessible(true);
+
+ // Save original
+ originalMapper = (ObjectMapper) field.get(null);
+
+ // Create a bad ObjectMapper
+ ObjectMapper badMapper = new FailingObjectMapper();
+ // Override
+ field.set(null, badMapper);
+
+ cosmosAsyncContainer.readItem(testObject.getId(), new PartitionKey(testObject.getMypk()), TestObject.class).block();
+ fail("The read operation should have failed");
+ } catch (CosmosException cosmosException) {
+ assertThat(cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.BADREQUEST);
+ assertThat(cosmosException.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE);
+ assertThat(cosmosException.getDiagnostics()).isNotNull();
+ assertThat(cosmosException.getResponseHeaders()).isNotNull();
+ assertThat(cosmosException.getResponseHeaders()).isNotEmpty();
+ } catch (IllegalAccessException e) {
+ fail("An IllegalAccessException shouldn't have occurred", e);
+ } finally {
+ // Restore original
+ Field field = Utils.class.getDeclaredField("simpleObjectMapper");
+ field.setAccessible(true);
+ field.set(null, originalMapper);
+ field.setAccessible(false);
+
+ safeClose(cosmosAsyncClient);
+ }
+ }
+
+ private class FailingObjectMapper extends ObjectMapper {
+ @Override
+ public JsonNode readTree(byte[] bytes) throws IOException {
+ throw new IOException("Simulated failure");
+ }
+
+ @Override
+ public JsonNode readTree(String content) throws JsonProcessingException {
+ throw new JsonParseException("Simulated failure");
+ }
+ }
+
+ @AfterSuite(groups = {"emulator"})
+ public void afterSuite() {
+ System.clearProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT");
+ System.clearProperty("COSMOS.IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED");
+ super.afterSuite();
+ }
+}
diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/epkversion/PartitionProcessorImplTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/epkversion/PartitionProcessorImplTests.java
index c86d37a4a3f7..a5e12735fdca 100644
--- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/epkversion/PartitionProcessorImplTests.java
+++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/epkversion/PartitionProcessorImplTests.java
@@ -3,7 +3,10 @@
package com.azure.cosmos.implementation.changefeed.epkversion;
+import com.azure.cosmos.BridgeInternal;
import com.azure.cosmos.CosmosAsyncContainer;
+import com.azure.cosmos.CosmosException;
+import com.azure.cosmos.implementation.HttpConstants;
import com.azure.cosmos.implementation.PartitionKeyRangeIsSplittingException;
import com.azure.cosmos.implementation.changefeed.CancellationTokenSource;
import com.azure.cosmos.implementation.changefeed.ChangeFeedContextClient;
@@ -22,6 +25,8 @@
import com.azure.cosmos.implementation.feedranges.FeedRangePartitionKeyRangeImpl;
import com.azure.cosmos.models.ChangeFeedProcessorItem;
import com.azure.cosmos.models.FeedResponse;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonMappingException;
import org.mockito.Mockito;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -29,9 +34,11 @@
import reactor.core.publisher.Mono;
import reactor.test.StepVerifier;
+import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
@@ -221,6 +228,70 @@ public void partitionSplitHappenOnFirstRequest() {
assertThat(feedRangeGoneException.getLastContinuation()).isEqualTo(leaseMock.getContinuationToken());
}
+ @Test(groups = "unit")
+ public void partitionProcessingErrorWhenInternalServerErrorIsHit() {
+
+ CosmosException parsingException = BridgeInternal.createCosmosException(
+ "A parsing error occurred.",
+ new IOException("An error occurred."),
+ new HashMap<>(),
+ HttpConstants.StatusCodes.BADREQUEST,
+ null);
+
+ BridgeInternal.setSubStatusCode(parsingException, HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE);
+
+ ChangeFeedObserver observerMock = Mockito.mock(ChangeFeedObserver.class);
+ ChangeFeedContextClient changeFeedContextClientMock = Mockito.mock(ChangeFeedContextClient.class);
+
+ // Setup initial state with continuation token
+ ChangeFeedStateV1 initialChangeFeedState = this.getChangeFeedStateWithContinuationTokens(1);
+
+ CosmosAsyncContainer containerMock = Mockito.mock(CosmosAsyncContainer.class);
+ ProcessorSettings processorSettings = new ProcessorSettings(initialChangeFeedState, containerMock);
+ processorSettings.withMaxItemCount(10);
+
+ // Setup lease and checkpointer mocks
+ Lease leaseMock = Mockito.mock(ServiceItemLeaseV1.class);
+ Mockito.when(leaseMock.getContinuationToken()).thenReturn(initialChangeFeedState.toString());
+
+ PartitionCheckpointer partitionCheckpointer = Mockito.mock(PartitionCheckpointerImpl.class);
+
+ String lastContinuationToken = initialChangeFeedState.toString();
+
+ // Setup change feed request to throw parsing exception
+ Mockito
+ .when(changeFeedContextClientMock.createDocumentChangeFeedQuery(Mockito.any(), Mockito.any(), Mockito.any()))
+ .thenReturn(Flux.error(parsingException));
+
+ // Checkpointing mock setup
+ final ChangeFeedState continuationState = ChangeFeedState.fromString(lastContinuationToken);
+ Mockito.when(partitionCheckpointer.checkpointPartition(continuationState))
+ .thenReturn(Mono.empty());
+
+ // Create processor
+ PartitionProcessorImpl partitionProcessor = new PartitionProcessorImpl<>(
+ observerMock,
+ changeFeedContextClientMock,
+ processorSettings,
+ partitionCheckpointer,
+ leaseMock,
+ ChangeFeedProcessorItem.class,
+ ChangeFeedMode.INCREMENTAL,
+ null);
+
+ StepVerifier
+ .create(partitionProcessor.run(new CancellationTokenSource().getToken()))
+ .verifyComplete();
+
+ // Verify that the PartitionProcessorImpl completed with the expected parsing exception
+ RuntimeException runtimeException = partitionProcessor.getResultException();
+ assertThat(runtimeException).isNotNull();
+ assertThat(runtimeException.getCause()).isInstanceOf(CosmosException.class);
+ CosmosException cosmosException = (CosmosException) runtimeException.getCause();
+ assertThat(cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.BADREQUEST);
+ assertThat(cosmosException.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE);
+ }
+
private ChangeFeedStateV1 getChangeFeedStateWithContinuationTokens(int tokenCount) {
String containerRid = "/cols/" + UUID.randomUUID();
String pkRangeId = UUID.randomUUID().toString();
diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/pkversion/PartitionProcessorImplTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/pkversion/PartitionProcessorImplTests.java
index 0658b000184f..acba6b6fd076 100644
--- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/pkversion/PartitionProcessorImplTests.java
+++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/changefeed/pkversion/PartitionProcessorImplTests.java
@@ -3,7 +3,10 @@
package com.azure.cosmos.implementation.changefeed.pkversion;
+import com.azure.cosmos.BridgeInternal;
import com.azure.cosmos.CosmosAsyncContainer;
+import com.azure.cosmos.CosmosException;
+import com.azure.cosmos.implementation.HttpConstants;
import com.azure.cosmos.implementation.PartitionKeyRangeIsSplittingException;
import com.azure.cosmos.implementation.changefeed.CancellationTokenSource;
import com.azure.cosmos.implementation.changefeed.ChangeFeedContextClient;
@@ -33,8 +36,10 @@
import java.time.Duration;
import java.time.Instant;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.HashMap;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
@@ -123,69 +128,69 @@ public void shouldCheckpointFor304WhenContinuationTokenChanges(
}
}
- @Test(groups = "unit")
- public void processedTimeSetAfterProcessing() {
- ChangeFeedObserver observerMock = Mockito.mock(ChangeFeedObserver.class);
- Mockito.when(observerMock.processChanges(Mockito.any(), Mockito.anyList())).thenReturn(Mono.empty());
-
+ @Test
+ public void partitionSplitHappenOnFirstRequest() {
+ @SuppressWarnings("unchecked") ChangeFeedObserver observerMock =
+ (ChangeFeedObserver) Mockito.mock(ChangeFeedObserver.class);
ChangeFeedContextClient changeFeedContextClientMock = Mockito.mock(ChangeFeedContextClient.class);
+ Mockito
+ .when(changeFeedContextClientMock.createDocumentChangeFeedQuery(Mockito.any(), Mockito.any(),
+ Mockito.any()))
+ .thenReturn(Flux.error(new PartitionKeyRangeIsSplittingException()));
+
+ ChangeFeedState changeFeedState = this.getChangeFeedStateWithContinuationTokens(2);
CosmosAsyncContainer containerMock = Mockito.mock(CosmosAsyncContainer.class);
+ ProcessorSettings processorSettings = new ProcessorSettings(changeFeedState, containerMock);
+ processorSettings.withMaxItemCount(10);
+
+ Lease leaseMock = Mockito.mock(ServiceItemLease.class);
+ Mockito
+ .when(leaseMock.getContinuationToken())
+ .thenReturn(changeFeedState.getContinuation().getCurrentContinuationToken().getToken());
- ChangeFeedStateV1 startState = getChangeFeedStateWithContinuationTokens(1);
- ProcessorSettings settings = new ProcessorSettings(startState, containerMock);
- settings.withMaxItemCount(10);
-
- Lease leaseMock = Mockito.mock(ServiceItemLeaseV1.class);
- Mockito.when(leaseMock.getContinuationToken()).thenReturn(startState.toString());
-
- PartitionCheckpointer checkpointerMock = Mockito.mock(PartitionCheckpointerImpl.class);
-
- // Create a feed response with one mocked result
- @SuppressWarnings("unchecked") FeedResponse feedResponseMock = Mockito.mock(FeedResponse.class);
- List results = new ArrayList<>();
- results.add(Mockito.mock(ChangeFeedProcessorItem.class));
- AtomicInteger counter = new AtomicInteger(0);
- Mockito.when(feedResponseMock.getResults()).thenAnswer(invocation -> {
- Thread.sleep(500);
- return counter.getAndIncrement() < 10 ? results : new ArrayList<>();
- });
- ChangeFeedState changeFeedState = this.getChangeFeedStateWithContinuationTokens(1);
- Mockito.when(feedResponseMock.getContinuationToken()).thenReturn(changeFeedState.toString());
-
- // The processor will continuously fetch, but we will cancel shortly after first batch
- Mockito.doReturn(Flux.just(feedResponseMock))
- .when(changeFeedContextClientMock)
- .createDocumentChangeFeedQuery(Mockito.any(), Mockito.any(), Mockito.any());
-
- PartitionProcessorImpl processor = new PartitionProcessorImpl(
+ LeaseCheckpointer leaseCheckpointerMock = Mockito.mock(LeaseCheckpointer.class);
+ PartitionCheckpointer partitionCheckpointer = new PartitionCheckpointerImpl(leaseCheckpointerMock, leaseMock);
+
+ PartitionProcessorImpl partitionProcessor = new PartitionProcessorImpl(
observerMock,
changeFeedContextClientMock,
- settings,
- checkpointerMock,
+ processorSettings,
+ partitionCheckpointer,
leaseMock,
- null);
- Instant initialTime = processor.getLastProcessedTime();
-
- CancellationTokenSource cts = new CancellationTokenSource();
- Mono runMono = processor.run(cts.getToken());
+ null
+ );
- StepVerifier.create(runMono)
- .thenAwait(Duration.ofMillis(800))
- .then(cts::cancel)
- .verifyComplete();
+ StepVerifier.create(partitionProcessor.run(new CancellationTokenSource().getToken()))
+ .verifyComplete();
- assertThat(processor.getLastProcessedTime()).isAfter(initialTime);
+ RuntimeException runtimeException = partitionProcessor.getResultException();
+ assertThat(runtimeException).isNotNull();
+ assertThat(runtimeException).isInstanceOf(FeedRangeGoneException.class);
+ FeedRangeGoneException feedRangeGoneException = (FeedRangeGoneException) runtimeException;
+ assertThat(feedRangeGoneException.getLastContinuation()).isEqualTo(leaseMock.getContinuationToken());
}
- @Test
- public void partitionSplitHappenOnFirstRequest() {
+ @Test(groups = {"unit"})
+ public void partitionProcessingErrorWhenInternalServerErrorIsHit() {
+
+ CosmosException parsingException = BridgeInternal.createCosmosException(
+ "A parsing error occurred.",
+ new IOException("An error occurred."),
+ new HashMap<>(),
+ HttpConstants.StatusCodes.BADREQUEST,
+ null);
+
+ BridgeInternal.setSubStatusCode(parsingException, HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE);
+
@SuppressWarnings("unchecked") ChangeFeedObserver observerMock =
(ChangeFeedObserver) Mockito.mock(ChangeFeedObserver.class);
ChangeFeedContextClient changeFeedContextClientMock = Mockito.mock(ChangeFeedContextClient.class);
+
+ // Setup change feed request to throw parsing exception
Mockito
.when(changeFeedContextClientMock.createDocumentChangeFeedQuery(Mockito.any(), Mockito.any(),
Mockito.any()))
- .thenReturn(Flux.error(new PartitionKeyRangeIsSplittingException()));
+ .thenReturn(Flux.error(parsingException));
ChangeFeedState changeFeedState = this.getChangeFeedStateWithContinuationTokens(2);
CosmosAsyncContainer containerMock = Mockito.mock(CosmosAsyncContainer.class);
@@ -210,13 +215,15 @@ public void partitionSplitHappenOnFirstRequest() {
);
StepVerifier.create(partitionProcessor.run(new CancellationTokenSource().getToken()))
- .verifyComplete();
+ .verifyComplete();
+ // Verify that the PartitionProcessorImpl completed with the expected parsing exception
RuntimeException runtimeException = partitionProcessor.getResultException();
assertThat(runtimeException).isNotNull();
- assertThat(runtimeException).isInstanceOf(FeedRangeGoneException.class);
- FeedRangeGoneException feedRangeGoneException = (FeedRangeGoneException) runtimeException;
- assertThat(feedRangeGoneException.getLastContinuation()).isEqualTo(leaseMock.getContinuationToken());
+ assertThat(runtimeException.getCause()).isInstanceOf(CosmosException.class);
+ CosmosException cosmosException = (CosmosException) runtimeException.getCause();
+ assertThat(cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.BADREQUEST);
+ assertThat(cosmosException.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE);
}
private ChangeFeedStateV1 getChangeFeedStateWithContinuationTokens(int tokenCount) {
diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayloadTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayloadTests.java
index 7d478acbb895..16a15157118d 100644
--- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayloadTests.java
+++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayloadTests.java
@@ -9,6 +9,8 @@
import org.testng.annotations.Ignore;
import org.testng.annotations.Test;
+import java.util.HashMap;
+
public class JsonNodeStorePayloadTests {
@Test(groups = {"unit"})
@Ignore("fallbackCharsetDecoder will only be initialized during the first time when JsonNodeStorePayload loaded," +
@@ -26,7 +28,7 @@ public void parsingBytesWithInvalidUT8Bytes() {
try {
byte[] bytes = hexStringToByteArray(invalidHexString);
ByteBuf byteBuf = Unpooled.wrappedBuffer(bytes);
- JsonNodeStorePayload jsonNodeStorePayload = new JsonNodeStorePayload(new ByteBufInputStream(byteBuf), bytes.length);
+ JsonNodeStorePayload jsonNodeStorePayload = new JsonNodeStorePayload(new ByteBufInputStream(byteBuf), bytes.length, new HashMap<>());
jsonNodeStorePayload.getPayload().toString();
} finally {
System.clearProperty("COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT");
diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md
index 4bc725af4448..9c3af5dfff77 100644
--- a/sdk/cosmos/azure-cosmos/CHANGELOG.md
+++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md
@@ -19,6 +19,7 @@
* Changed timestamp format to be consistent in leases for CFP. - [PR 46784](https://github.com/Azure/azure-sdk-for-java/pull/46784)
* Added `MetadataThrottlingRetryPolicy` for `PartitionKeyRange` `RequestRateTooLargeException` handling. - [PR 46823](https://github.com/Azure/azure-sdk-for-java/pull/46823)
* Ensure effective `DirectConnectionConfig#setNetworkRequestTimeout` is set to at least 5 seconds. - [PR 47024](https://github.com/Azure/azure-sdk-for-java/pull/47024)
+* Wrap JSON parsing exceptions as `CosmosException` to provide better context. - [PR 47040](https://github.com/Azure/azure-sdk-for-java/pull/47040)
### 4.74.0 (2025-09-05)
diff --git a/sdk/cosmos/azure-cosmos/docs/StatusCodes.md b/sdk/cosmos/azure-cosmos/docs/StatusCodes.md
index 4cc7bb2f5276..c8da7a1690ce 100644
--- a/sdk/cosmos/azure-cosmos/docs/StatusCodes.md
+++ b/sdk/cosmos/azure-cosmos/docs/StatusCodes.md
@@ -14,49 +14,49 @@ This document is intentionally not going into details on how resilient applicati
## Status codes
-| Status code | Substatus code | Expected to be transient | Additional info |
-| -----------------:|------------------------------:|:-----------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|200| 0 |No| `OK` |
-|201| 0 |No| `Created` - returned for createItem or upsertItem when a new document was created |
-|204| 0 |No| `No Content` - returned when no payload would ever be returned - like for delete operations |
-|207| 0 |No| `Multi-Status` - returned for transactional batch or bulk operations when some of the item operations have failed and others succeeded. The API allows checking status codes of item operations. |
-|304| 0 |No| `Not Modified` - will be returned for `ChangeFeed` operations to indicate that there are no more changes |
-|400| \* |No| `Bad Request` - indicates that the client violated some protocol constraint. See [Bad Request TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-bad-request) for more details. |
-|400| 1001 |No| `Bad Request/Partition key mismatch` - indicates that the PartitionKey defined in the point operation does not match the partition key value being extracted in the service form the document's payload based on the `PartitionKeyDefinition` of the container. See [Bad Request TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-bad-request) for more details. |
-|400| 1004 |No| `Bad Request/CrossPartitionQueryNotServable` - indicates that the client attempted to execute a cross-partition query, which cannot be processed with the current SDK version. Usually this means that the query uses a query construct, which is not yet supported in the SDK version being used. Upgrading the SDK might help to address the problem. See [Bad Request TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-bad-request) for more details. |
-|401| 0 |No| `Unauthorized` - indicates that the client used invalid credentials. The most frequent scenario when this is happening, is when customers rotate the currently used key. Key rotation needs to be replicated across regions, which can take up-to a few minuts. During this time a `401 Unauthroized` would be used when the client is using the old or new key while the replication is still happening. The best way to do key rotation is to rotate the key only after it is not used by applications anymore - that is why a primary and secondary key exists for both writable and read-only keys. More details can be found here - [key rotation best practices](https://learn.microsoft.com/azure/cosmos-db/secure-access-to-data?tabs=using-primary-key#key-rotation). In addition this could also mean an invalid key when using `MasterKey`-based authentication, it could mean there is a time-synchronization issue or when using AAD that the AAD credentials are not correctly set-up. See [Unauthorized TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-unauthorized) for more details. |
-|403| \* |No| `Forbidden` - indicates that the service rejected the request due to missing permissions. See [Forbidden TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-forbidden) |
-|403| 3 |Yes (up to few minutes)| `Forbidden/WriteForbidden` - indicates that the client attempted a write operation against a read-only region in a single write region set-up. |
-|403| 1008 |Yes (up to few minutes)| `Forbidden/AccountNotFound` - indicates that the client attempted a read or write operation against a replica that did not have information about the database account. |
-|403| 5300 |No| `Forbidden/AADForMetadata` - indicates that the client attempted a metadata operation (like creating, deleting or modifying a container/database) when using AAD authentication. This is not possible via the Data plane SDK. To execute control plane operatiosn with AAD authentication, please use the management SDK. See [Forbidden TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-forbidden#partition-key-exceeding-storage) and also the [Azure Cosmos DB Service quotas](https://learn.microsoft.com/azure/cosmos-db/concepts-limits#provisioned-throughput) for more details |
-|403| 1014 |No| `Forbidden/LogicalPartitionExceedsStorage` - indicates that the data size of a logical partition exceeds the service quota (currently 20 GB). See [Forbidden TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-forbidden#non-data-operations-are-not-allowed) for more details |
-|404| 0 |No| `Not found` - Indicates that the resource the client tried to read does not exist (on the replica being contacted). Depending on the consistency level used this could be a transient error condition - but when using less than strong consistency the application needs to be able to handle temporarily seeing 404/0 from some replica even after document got created gracefully. See [Not found TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-not-found) for more details. |
-|404| 1002 |In most cases| `Not Found/Read session no available` - Indicates that a client uses session consistency and reached a replica that has a replication lag and has not caught-up to the requested session token. In many cases this error condition will be transient. But there are certain situation in which it could persist for longer period of times - either a wrong session token is being provided in the application or in a Multi-Write region set-up operations are regulary directed to different regions |
-|404| 1003 |Yes (up to few minutes)| `Not Found/Owner resource does not exist` - Indicates that a client attempted to process an operation on a resource whose parent does not exist. For example an attempt to do a point operation on a document when the container does not exist (yet). Can be transient when attempting document operations immediately after creating a container etc. - but when not transient usually means a bug in your application. |
-|404| 1024 |x| `Not Found/Incorrect Container resource id` - Indicates that a client attempted to use a container that has recently been deleted and recreated. So, the cached container id in the client is stale - and identifies the previosuly deleted container. The SDK will trigger retries - in general applications need to be able to tolerate that container deletion and immediate recreation will take up-to a few seconds/minutes to be replicated across all regions. |
-|408| \* |Yes| `Request timeout` - Indicates a timeout for an attempted operation. See [Request timeout TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-java-sdk-request-timeout) for more details. |
-|408| 20008 |Yes, unless unrealistic e2e timeout is used| `Request timeout/End-to-end timeout exceeded` - Indicates that the application defined end-to-end timeout was exceeded when processing an operation. This will usually be a transient error condition - exceptions are when the application defines unrealistic end-to-end timeouts - for example when executing a query that could very well take a few seconds because it is relatively inefficient or when the end-to-end timeout is lower than the to-be-expected network transit time between the application's location and the Cosmos DB service endpoint. |
-|408| 20901 |No| `Request timeout/Negative End-to-end timeout provided` - Indicates that the application defined a negative end-to-end timeout. This indicates a bug in your application. |
-|409| 0 |No| `Conflict` - Indicates that the attempt to insert (or upsert) a new document cannot be processed because another document with the same identity (partition key value + value of `id` property) exists or a unqiue key constraint would be violated. |
-|410| \* |Yes| `Gone` - indicates transient error conditions that could happen while replica get moved to a different node or partitions get split/merged. The SDK will retry these error conditions and usually mitigate them without even surfacing them to the application. If these errors get surfaced to the application as `CosmosException` with status code `410` or `503` these errors should always be transient. |
-|410| 1000 |x| `Not Found/Incorrect Container resource id` - Indicates that a client attempted to use a container that has recently been deleted and recreated. So, the cached container id in the client is stale - and identifies the previosuly deleted container. The SDK will trigger retries - in general applications need to be able to tolerate that container deletion and immediate recreation will take up-to a few seconds/minutes to be replicated across all regions. |
-|410| 21010 |Yes| `Service timeout` - Indicates that an operation has been timed out at the service. See [Request timeout TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-request-timeout) for more details. This error will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
-|410| 21006 |Yes| `Global strong write barrier not met` - Indicates that synchronous replication of a write operation in a multi-region account with strong consistency did not complete. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
-|410| 21007 |Yes| `Read quorum not met` - Indicates that no read quorum could be achieved when using strong or bounded staleness consistency. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
-|412| 0 |No| `Precondition failed` - The document has been modified since the application read it (and retrieved the etag that was used as pre-codnition for the write operation). This is the typical optimistic concurrency signal - and needs to be gracefully handled in your application. The usual patterns is to re-read the document, apply the same changes and retry the write with the updated etag. See [Precondition failed TSG - trouble-shooting guide](https://aka.ms/CosmosDB/sql/errors/precondition-failed) for more details. |
-|413| \* |No| `Request entity too large` - indicates that the client attempted to create or update a document with a payload that is too large. See [Azure Cosmos DB Service quotas](https://learn.microsoft.com/azure/cosmos-db/concepts-limits#per-item-limits) for more details. |
-|429| 3200 |Depends on app RU/s usage| `User throttling` - Indicates that the operations being processed by your Cosmos DB account exceed the provisioned throughput RU/s. Mitigation can be done by either scaling-up - or improving the efficiency especially of queries to reduce the RU/s consumption. See [Throttling TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-request-rate-too-large) for more details. |
-|429| 3201 |Yes| `Metadata throttling` - Indicates that metadata operations are being throttled. Increasing provisioned throughput (RU/s) won't help - this usually indicates a bug in your application where metadata calls are triggered extensively or you are not using a singleton pattern for `CosmosClient`/`CosmosAsyncClient`. See [Throttling TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-request-rate-too-large) for more details. |
-|429| < 3200 |Yes (up to few minutes)| `SLA violating throttling` - Indicates service-side throttling that will count against the service's SLA. These errors should always be transient. |
-|449| 0 |Yes| `RetryWith` - Indicates a concurrent attempt to change documents server-side - for example via patch or stored procedure invocation. The `449` status code will be automatically retried by the SDK. This condition should always be transient as long as the application is not excessively doing concurrent changes to documents. |
-|500| 0 |Unknown| `Internal Server error` - Error returned from server, Indicates unexpected and unqualified internal service error. |
-|500| 20902 - 20910 ; 20912 - 20913 |Unknown| `Internal Server error` - Client generated 500. The error message will have the details about the cause. |
-|502| 21011 |Unknown| `Bad gateway` - Indicated an HTTP proxy you are using is misbehaving. Any `502` or `504` is a clear signal that the actual problem is not in Cosmos DB but the proxy being used. In general HTTP proxies are not recommended for any production workload. |
-|503| \* |Yes| `Service unavailable` - Indicates that either service issue occurred or the client event after retries is not able to successfully process an operation. See [Service unavailable TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-service-unavailable) |
-|503| 21001 |Yes| `Name cache is stale` - Indicates that a container was deleted and recreated - and the client's cache still has the old container metadata. This error indicates that the client even after refreshing the cache got the container metadata of the "old" container. Usually it indicates that the replication of the new container metadata across all regions took longer than usual. This error should always be transient. |
-|503| 21002 |Yes| `Partition key range gone` - Indicates that a partition split or merge happened and the client even after several retries was not able to get the metadata for the new partition. This error indicates a delay of replication of partition key range metadata and should always be transient. |
-|503| 21003 |Yes| `Completing split` - Indicates that a partition split or merge is pending and commiting the split takes longer than expected. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
-|503| 21004 |Yes| `Completing migration` - Indicates that a partition migration due to load-balancing is pending and takes longer than expected. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
-|410/503| 21005 |Yes| `Serverside 410` - Indicates that a replica returns a 410 - usually during initialization of the replica. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
-|503| 21008 |Yes| `Service unavailable` - Indicates that a replica returned `503` service unavailable. This error should always be transient and will surface as a CosmosException with status code `503` after exceeding SDK-retries. |
-|504| 0 |Unknown| `Gateway timeout` - Indicated an HTTP proxy you are using timed out. Any `502` or `504` is a clear signal that the actual problem is not in Cosmos DB but the proxy being used. In general HTTP proxies are not recommended for any production workload. |
+| Status code | Substatus code | Expected to be transient | Additional info |
+| -----------------:|--------------------------------------:|:-----------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|200| 0 |No| `OK` |
+|201| 0 |No| `Created` - returned for createItem or upsertItem when a new document was created |
+|204| 0 |No| `No Content` - returned when no payload would ever be returned - like for delete operations |
+|207| 0 |No| `Multi-Status` - returned for transactional batch or bulk operations when some of the item operations have failed and others succeeded. The API allows checking status codes of item operations. |
+|304| 0 |No| `Not Modified` - will be returned for `ChangeFeed` operations to indicate that there are no more changes |
+|400| \* |No| `Bad Request` - indicates that the client violated some protocol constraint. See [Bad Request TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-bad-request) for more details. |
+|400| 1001 |No| `Bad Request/Partition key mismatch` - indicates that the PartitionKey defined in the point operation does not match the partition key value being extracted in the service form the document's payload based on the `PartitionKeyDefinition` of the container. See [Bad Request TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-bad-request) for more details. |
+|400| 1004 |No| `Bad Request/CrossPartitionQueryNotServable` - indicates that the client attempted to execute a cross-partition query, which cannot be processed with the current SDK version. Usually this means that the query uses a query construct, which is not yet supported in the SDK version being used. Upgrading the SDK might help to address the problem. See [Bad Request TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-bad-request) for more details. |
+|401| 0 |No| `Unauthorized` - indicates that the client used invalid credentials. The most frequent scenario when this is happening, is when customers rotate the currently used key. Key rotation needs to be replicated across regions, which can take up-to a few minuts. During this time a `401 Unauthroized` would be used when the client is using the old or new key while the replication is still happening. The best way to do key rotation is to rotate the key only after it is not used by applications anymore - that is why a primary and secondary key exists for both writable and read-only keys. More details can be found here - [key rotation best practices](https://learn.microsoft.com/azure/cosmos-db/secure-access-to-data?tabs=using-primary-key#key-rotation). In addition this could also mean an invalid key when using `MasterKey`-based authentication, it could mean there is a time-synchronization issue or when using AAD that the AAD credentials are not correctly set-up. See [Unauthorized TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-unauthorized) for more details. |
+|403| \* |No| `Forbidden` - indicates that the service rejected the request due to missing permissions. See [Forbidden TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-forbidden) |
+|403| 3 |Yes (up to few minutes)| `Forbidden/WriteForbidden` - indicates that the client attempted a write operation against a read-only region in a single write region set-up. |
+|403| 1008 |Yes (up to few minutes)| `Forbidden/AccountNotFound` - indicates that the client attempted a read or write operation against a replica that did not have information about the database account. |
+|403| 5300 |No| `Forbidden/AADForMetadata` - indicates that the client attempted a metadata operation (like creating, deleting or modifying a container/database) when using AAD authentication. This is not possible via the Data plane SDK. To execute control plane operatiosn with AAD authentication, please use the management SDK. See [Forbidden TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-forbidden#partition-key-exceeding-storage) and also the [Azure Cosmos DB Service quotas](https://learn.microsoft.com/azure/cosmos-db/concepts-limits#provisioned-throughput) for more details |
+|403| 1014 |No| `Forbidden/LogicalPartitionExceedsStorage` - indicates that the data size of a logical partition exceeds the service quota (currently 20 GB). See [Forbidden TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-forbidden#non-data-operations-are-not-allowed) for more details |
+|404| 0 |No| `Not found` - Indicates that the resource the client tried to read does not exist (on the replica being contacted). Depending on the consistency level used this could be a transient error condition - but when using less than strong consistency the application needs to be able to handle temporarily seeing 404/0 from some replica even after document got created gracefully. See [Not found TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-not-found) for more details. |
+|404| 1002 |In most cases| `Not Found/Read session no available` - Indicates that a client uses session consistency and reached a replica that has a replication lag and has not caught-up to the requested session token. In many cases this error condition will be transient. But there are certain situation in which it could persist for longer period of times - either a wrong session token is being provided in the application or in a Multi-Write region set-up operations are regulary directed to different regions |
+|404| 1003 |Yes (up to few minutes)| `Not Found/Owner resource does not exist` - Indicates that a client attempted to process an operation on a resource whose parent does not exist. For example an attempt to do a point operation on a document when the container does not exist (yet). Can be transient when attempting document operations immediately after creating a container etc. - but when not transient usually means a bug in your application. |
+|404| 1024 |x| `Not Found/Incorrect Container resource id` - Indicates that a client attempted to use a container that has recently been deleted and recreated. So, the cached container id in the client is stale - and identifies the previosuly deleted container. The SDK will trigger retries - in general applications need to be able to tolerate that container deletion and immediate recreation will take up-to a few seconds/minutes to be replicated across all regions. |
+|408| \* |Yes| `Request timeout` - Indicates a timeout for an attempted operation. See [Request timeout TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-java-sdk-request-timeout) for more details. |
+|408| 20008 |Yes, unless unrealistic e2e timeout is used| `Request timeout/End-to-end timeout exceeded` - Indicates that the application defined end-to-end timeout was exceeded when processing an operation. This will usually be a transient error condition - exceptions are when the application defines unrealistic end-to-end timeouts - for example when executing a query that could very well take a few seconds because it is relatively inefficient or when the end-to-end timeout is lower than the to-be-expected network transit time between the application's location and the Cosmos DB service endpoint. |
+|408| 20901 |No| `Request timeout/Negative End-to-end timeout provided` - Indicates that the application defined a negative end-to-end timeout. This indicates a bug in your application. |
+|409| 0 |No| `Conflict` - Indicates that the attempt to insert (or upsert) a new document cannot be processed because another document with the same identity (partition key value + value of `id` property) exists or a unqiue key constraint would be violated. |
+|410| \* |Yes| `Gone` - indicates transient error conditions that could happen while replica get moved to a different node or partitions get split/merged. The SDK will retry these error conditions and usually mitigate them without even surfacing them to the application. If these errors get surfaced to the application as `CosmosException` with status code `410` or `503` these errors should always be transient. |
+|410| 1000 |x| `Not Found/Incorrect Container resource id` - Indicates that a client attempted to use a container that has recently been deleted and recreated. So, the cached container id in the client is stale - and identifies the previosuly deleted container. The SDK will trigger retries - in general applications need to be able to tolerate that container deletion and immediate recreation will take up-to a few seconds/minutes to be replicated across all regions. |
+|410| 21010 |Yes| `Service timeout` - Indicates that an operation has been timed out at the service. See [Request timeout TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-request-timeout) for more details. This error will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
+|410| 21006 |Yes| `Global strong write barrier not met` - Indicates that synchronous replication of a write operation in a multi-region account with strong consistency did not complete. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
+|410| 21007 |Yes| `Read quorum not met` - Indicates that no read quorum could be achieved when using strong or bounded staleness consistency. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
+|412| 0 |No| `Precondition failed` - The document has been modified since the application read it (and retrieved the etag that was used as pre-codnition for the write operation). This is the typical optimistic concurrency signal - and needs to be gracefully handled in your application. The usual patterns is to re-read the document, apply the same changes and retry the write with the updated etag. See [Precondition failed TSG - trouble-shooting guide](https://aka.ms/CosmosDB/sql/errors/precondition-failed) for more details. |
+|413| \* |No| `Request entity too large` - indicates that the client attempted to create or update a document with a payload that is too large. See [Azure Cosmos DB Service quotas](https://learn.microsoft.com/azure/cosmos-db/concepts-limits#per-item-limits) for more details. |
+|429| 3200 |Depends on app RU/s usage| `User throttling` - Indicates that the operations being processed by your Cosmos DB account exceed the provisioned throughput RU/s. Mitigation can be done by either scaling-up - or improving the efficiency especially of queries to reduce the RU/s consumption. See [Throttling TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-request-rate-too-large) for more details. |
+|429| 3201 |Yes| `Metadata throttling` - Indicates that metadata operations are being throttled. Increasing provisioned throughput (RU/s) won't help - this usually indicates a bug in your application where metadata calls are triggered extensively or you are not using a singleton pattern for `CosmosClient`/`CosmosAsyncClient`. See [Throttling TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-request-rate-too-large) for more details. |
+|429| < 3200 |Yes (up to few minutes)| `SLA violating throttling` - Indicates service-side throttling that will count against the service's SLA. These errors should always be transient. |
+|449| 0 |Yes| `RetryWith` - Indicates a concurrent attempt to change documents server-side - for example via patch or stored procedure invocation. The `449` status code will be automatically retried by the SDK. This condition should always be transient as long as the application is not excessively doing concurrent changes to documents. |
+|500| 0 |Unknown| `Internal Server error` - Error returned from server, Indicates unexpected and unqualified internal service error. |
+|500| 20902 - 20910 ; 20912 - 20913 ; 21011 |Unknown| `Internal Server error` - Client generated 500. The error message will have the details about the cause. |
+|502| 21011 |Unknown| `Bad gateway` - Indicated an HTTP proxy you are using is misbehaving. Any `502` or `504` is a clear signal that the actual problem is not in Cosmos DB but the proxy being used. In general HTTP proxies are not recommended for any production workload. |
+|503| \* |Yes| `Service unavailable` - Indicates that either service issue occurred or the client event after retries is not able to successfully process an operation. See [Service unavailable TSG - trouble-shooting guide](https://learn.microsoft.com/azure/cosmos-db/nosql/troubleshoot-service-unavailable) |
+|503| 21001 |Yes| `Name cache is stale` - Indicates that a container was deleted and recreated - and the client's cache still has the old container metadata. This error indicates that the client even after refreshing the cache got the container metadata of the "old" container. Usually it indicates that the replication of the new container metadata across all regions took longer than usual. This error should always be transient. |
+|503| 21002 |Yes| `Partition key range gone` - Indicates that a partition split or merge happened and the client even after several retries was not able to get the metadata for the new partition. This error indicates a delay of replication of partition key range metadata and should always be transient. |
+|503| 21003 |Yes| `Completing split` - Indicates that a partition split or merge is pending and commiting the split takes longer than expected. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
+|503| 21004 |Yes| `Completing migration` - Indicates that a partition migration due to load-balancing is pending and takes longer than expected. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
+|410/503| 21005 |Yes| `Serverside 410` - Indicates that a replica returns a 410 - usually during initialization of the replica. This error should always be transient and will be mapped to a CosmosException with status code `503` when surfacing it to the application after exceeding SDK-retries. |
+|503| 21008 |Yes| `Service unavailable` - Indicates that a replica returned `503` service unavailable. This error should always be transient and will surface as a CosmosException with status code `503` after exceeding SDK-retries. |
+|504| 0 |Unknown| `Gateway timeout` - Indicated an HTTP proxy you are using timed out. Any `502` or `504` is a clear signal that the actual problem is not in Cosmos DB but the proxy being used. In general HTTP proxies are not recommended for any production workload. |
diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java
index 2bfa1246c75c..dd7b0d6e6d43 100644
--- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java
+++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java
@@ -360,6 +360,10 @@ public class Configs {
private static final String HTTP2_MAX_CONCURRENT_STREAMS = "COSMOS.HTTP2_MAX_CONCURRENT_STREAMS";
private static final String HTTP2_MAX_CONCURRENT_STREAMS_VARIABLE = "COSMOS_HTTP2_MAX_CONCURRENT_STREAMS";
+ private static final boolean DEFAULT_IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED = false;
+ private static final String IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED = "COSMOS.IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED";
+ private static final String IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED_VARIABLE = "COSMOS_IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED";
+
public static final String APPLICATIONINSIGHTS_CONNECTION_STRING = "applicationinsights.connection.string";
public static final String APPLICATIONINSIGHTS_CONNECTION_STRING_VARIABLE = "APPLICATIONINSIGHTS_CONNECTION_STRING";
@@ -1240,4 +1244,14 @@ public static EnumSet getDefaultOtelSpanAttributeNamingSc
return AttributeNamingScheme.parse(DEFAULT_OTEL_SPAN_ATTRIBUTE_NAMING_SCHEME);
}
+
+ public static boolean isNonParseableDocumentLoggingEnabled() {
+ String isNonParseableDocumentLoggingEnabledAsString = System.getProperty(
+ IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED,
+ firstNonNull(
+ emptyToNull(System.getenv().get(IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED_VARIABLE)),
+ String.valueOf(DEFAULT_IS_NON_PARSEABLE_DOCUMENT_LOGGING_ENABLED)));
+
+ return Boolean.parseBoolean(isNonParseableDocumentLoggingEnabledAsString);
+ }
}
diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java
index 1f7a42c678d1..63c238f0e6ab 100644
--- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java
+++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java
@@ -479,6 +479,7 @@ public static class SubStatusCodes {
public static final int SERVER_GENERATED_503 = 21008;
public static final int NO_VALID_STORE_RESPONSE = 21009;
public static final int SERVER_GENERATED_408 = 21010;
+ public static final int FAILED_TO_PARSE_SERVER_RESPONSE = 21011;
}
public static class HeaderValues {
diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java
index 2ee089418852..cec240dc11bc 100644
--- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java
+++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java
@@ -475,18 +475,19 @@ private Mono toDocumentServiceResponse(Mono toDocumentServiceResponse(Mono responseHeaders) {
+
+ // TODO: Review adding resource address
+ CosmosException exceptionToThrow = BridgeInternal.createCosmosException(
+ nestedException.getMessage(),
+ nestedException,
+ responseHeaders,
+ statusCode,
+ Strings.Emtpy);
+
+ BridgeInternal.setSubStatusCode(exceptionToThrow, substatusCode);
+
+ return exceptionToThrow;
+ }
}
diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java
index afbae881cf6a..bbf642ba667a 100644
--- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java
+++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java
@@ -4,6 +4,7 @@
package com.azure.cosmos.implementation.directconnectivity;
import com.azure.cosmos.implementation.Configs;
+import com.azure.cosmos.implementation.HttpConstants;
import com.azure.cosmos.implementation.Utils;
import com.fasterxml.jackson.databind.JsonNode;
import io.netty.buffer.ByteBufInputStream;
@@ -16,6 +17,8 @@
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+import java.util.Map;
public class JsonNodeStorePayload implements StorePayload {
private static final Logger logger = LoggerFactory.getLogger(JsonNodeStorePayload.class);
@@ -23,17 +26,17 @@ public class JsonNodeStorePayload implements StorePayload {
private final int responsePayloadSize;
private final JsonNode jsonValue;
- public JsonNodeStorePayload(ByteBufInputStream bufferStream, int readableBytes) {
+ public JsonNodeStorePayload(ByteBufInputStream bufferStream, int readableBytes, Map responseHeaders) {
if (readableBytes > 0) {
this.responsePayloadSize = readableBytes;
- this.jsonValue = fromJson(bufferStream, readableBytes);
+ this.jsonValue = fromJson(bufferStream, readableBytes, responseHeaders);
} else {
this.responsePayloadSize = 0;
this.jsonValue = null;
}
}
- private static JsonNode fromJson(ByteBufInputStream bufferStream, int readableBytes) {
+ private static JsonNode fromJson(ByteBufInputStream bufferStream, int readableBytes, Map responseHeaders) {
byte[] bytes = new byte[readableBytes];
try {
bufferStream.read(bytes);
@@ -41,25 +44,56 @@ private static JsonNode fromJson(ByteBufInputStream bufferStream, int readableBy
} catch (IOException e) {
if (fallbackCharsetDecoder != null) {
logger.warn("Unable to parse JSON, fallback to use customized charset decoder.", e);
- return fromJsonWithFallbackCharsetDecoder(bytes);
+ return fromJsonWithFallbackCharsetDecoder(bytes, responseHeaders);
} else {
- throw new IllegalStateException("Unable to parse JSON.", e);
+
+ String baseErrorMessage = "Failed to parse JSON document. No fallback charset decoder configured.";
+
+ if (Configs.isNonParseableDocumentLoggingEnabled()) {
+ String documentSample = Base64.getEncoder().encodeToString(bytes);
+ logger.error(baseErrorMessage + " " + "Document in Base64 format: [" + documentSample + "]", e);
+ } else {
+ logger.error(baseErrorMessage);
+ }
+
+ IllegalStateException innerException = new IllegalStateException("Unable to parse JSON.", e);
+
+ throw Utils.createCosmosException(
+ HttpConstants.StatusCodes.BADREQUEST,
+ HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE,
+ innerException,
+ responseHeaders);
}
}
}
- private static JsonNode fromJsonWithFallbackCharsetDecoder(byte[] bytes) {
+ private static JsonNode fromJsonWithFallbackCharsetDecoder(byte[] bytes, Map responseHeaders) {
try {
String sanitizedJson = fallbackCharsetDecoder.decode(ByteBuffer.wrap(bytes)).toString();
return Utils.getSimpleObjectMapper().readTree(sanitizedJson);
} catch (IOException e) {
- logger.warn("Unable to parse JSON, fallback failed.", e);
- throw new IllegalStateException(
+
+ String baseErrorMessage = "Failed to parse JSON document even after applying fallback charset decoder.";
+
+ if (Configs.isNonParseableDocumentLoggingEnabled()) {
+ String documentSample = Base64.getEncoder().encodeToString(bytes);
+ logger.error(baseErrorMessage + " " + "Document in Base64 format: [" + documentSample + "]", e);
+ } else {
+ logger.error(baseErrorMessage);
+ }
+
+ Exception nestedException = new IllegalStateException(
String.format(
"Unable to parse JSON with fallback charset decoder[OnMalformedInput %s, OnUnmappedCharacter %s]",
Configs.getCharsetDecoderErrorActionOnMalformedInput(),
Configs.getCharsetDecoderErrorActionOnUnmappedCharacter()),
e);
+
+ throw Utils.createCosmosException(
+ HttpConstants.StatusCodes.BADREQUEST,
+ HttpConstants.SubStatusCodes.FAILED_TO_PARSE_SERVER_RESPONSE,
+ nestedException,
+ responseHeaders);
}
}
diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java
index 158207d9b569..c8539f09cba5 100644
--- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java
+++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java
@@ -70,7 +70,7 @@ public StoreResponse(
replicaStatusList = new HashMap<>();
if (contentStream != null) {
try {
- this.responsePayload = new JsonNodeStorePayload(contentStream, responsePayloadLength);
+ this.responsePayload = new JsonNodeStorePayload(contentStream, responsePayloadLength, headerMap);
}
finally {
try {