Skip to content

Commit 6817570

Browse files
xinlian12annie-macsimorenohCopilot
authored
removeTypeRequirementFromAvailabilityStrategyConfig (#44190)
* remove type requirment from availability_strategy_config --------- Co-authored-by: annie-mac <[email protected]> Co-authored-by: Simon Moreno <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent a6d25a6 commit 6817570

File tree

6 files changed

+111
-39
lines changed

6 files changed

+111
-39
lines changed

sdk/cosmos/azure-cosmos/README.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,84 @@ Per partition automatic failover enables the SDK to automatically redirect write
945945
only for single write region accounts that have at least one read-only region. When per partition automatic failover is enabled, per partition circuit breaker and cross-region hedging is enabled by default, meaning
946946
all its configurable options also apply to per partition automatic failover. To enable this feature, follow the guide [here](https://learn.microsoft.com/azure/cosmos-db/how-to-configure-per-partition-automatic-failover).
947947

948+
### Cross Region Hedging Availability Strategy
949+
950+
Cross region hedging availability strategy improves availability and reduces latency by sending duplicate requests to secondary regions if the primary region is slow or unavailable. The SDK uses the first successful response, helping to mitigate regional outages or high latency.
951+
952+
#### Key Concepts
953+
954+
- **Hedged Requests**: The SDK sends a parallel request to another region if the primary region does not respond within a configured delay.
955+
- **Configurable**: Hedging can be enabled or disabled, and the delay before sending a hedged request is tunable.
956+
- **ThreadPoolExecutor**: The sync CosmosClient instance will use a ThreadPoolExecutor under the hood for parallelizing requests. Users can choose whether to use the default ThreadPoolExecutor the SDK uses, or to pass in their own instance. *The async client does not need the executor since it uses asynchronous logic to parallelize requests.*
957+
958+
#### Enabling Cross Region Hedging
959+
960+
You can enable cross region hedging by passing the `availability_strategy_config` parameter as a dictionary to the `CosmosClient` or per-request. The most common configuration keys are `threshold_ms` (delay before sending a hedged request) and `threshold_steps_ms` (step interval for additional hedged requests).
961+
962+
#### Client-level configuration
963+
964+
```python
965+
from azure.cosmos import CosmosClient
966+
967+
client = CosmosClient(
968+
"<account-uri>",
969+
"<account-key>",
970+
availability_strategy_config={"threshold_ms": 150, "threshold_steps_ms": 50}
971+
)
972+
```
973+
974+
#### Request-level configuration
975+
976+
```python
977+
# Override or provide the strategy per request
978+
container.read_item(
979+
item="item_id",
980+
partition_key="pk_value",
981+
availability_strategy_config={"threshold_ms": 150, "threshold_steps_ms": 50}
982+
)
983+
```
984+
985+
#### Disable availability strategy on request level
986+
987+
```python
988+
# Disable cross region hedging for a specific request, even if enabled at client level
989+
container.read_item(
990+
item="item_id",
991+
partition_key="pk_value",
992+
availability_strategy_config=None
993+
)
994+
```
995+
996+
#### Customized executor for hedging for sync client
997+
998+
```python
999+
# Pass in your own custom TheadPoolExecutor to use with the sync client
1000+
from concurrent.futures import ThreadPoolExecutor
1001+
from azure.cosmos import CosmosClient
1002+
1003+
executor = ThreadPoolExecutor(max_workers=2)
1004+
client = CosmosClient(
1005+
"<account-uri>",
1006+
"<account-key>",
1007+
availability_strategy_config={"threshold_ms": 150, "threshold_steps_ms": 50},
1008+
availability_strategy_executor=executor
1009+
)
1010+
```
1011+
1012+
#### Customized max concurrency for hedging for async client
1013+
1014+
```python
1015+
# Customize the max concurrency on the default ThreadPoolExecutor for the sync client
1016+
from azure.cosmos import CosmosClient
1017+
1018+
client = CosmosClient(
1019+
"<account-uri>",
1020+
"<account-key>",
1021+
availability_strategy_config={"threshold_ms": 150, "threshold_steps_ms": 50},
1022+
availability_strategy_max_concurrency=2
1023+
)
1024+
```
1025+
9481026
## Troubleshooting
9491027

9501028
### General

sdk/cosmos/azure-cosmos/azure/cosmos/_availability_strategy_config.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class CrossRegionHedgingStrategyConfig:
3434
The config dictionary can contain:
3535
- threshold_ms: Time in ms before routing to alternate region (default: 500)
3636
- threshold_steps_ms: Time interval between routing attempts (default: 100)
37-
- type: Must be "CrossRegionHedging" if provided
3837
"""
3938
def __init__(self, config: Optional[dict[str, Any]] = None) -> None:
4039
if config is None:
@@ -60,9 +59,4 @@ def _validate_hedging_config(config: Optional[dict[str, Any]]) -> Optional[Cross
6059
if config is None:
6160
return None
6261

63-
if "type" not in config:
64-
raise ValueError("Missing type field in availability strategy config")
65-
if config["type"] != "CrossRegionHedging":
66-
raise ValueError("Wrong availability strategy type is provided")
67-
6862
return CrossRegionHedgingStrategyConfig(config)

sdk/cosmos/azure-cosmos/samples/examples.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@
363363

364364
# configure availability strategy config on request level
365365
# [START read_item_with_availability_strategy_config]
366-
strategy = {'type': 'CrossRegionHedging', 'threshold_ms':500, 'threshold_steps_ms':100}
366+
strategy = {'threshold_ms':500, 'threshold_steps_ms':100}
367367
container.read_item(
368368
item="id1",
369369
partition_key="pk1",

sdk/cosmos/azure-cosmos/samples/examples_async.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ async def examples_async():
367367

368368
# configure availability strategy config on request level
369369
# [START read_item_with_availability_strategy_config]
370-
strategy = strategy = {'type': 'CrossRegionHedging', 'threshold_ms':500, 'threshold_steps_ms':100}
370+
strategy = {'threshold_ms':500, 'threshold_steps_ms':100}
371371
await container.read_item(
372372
item="id1",
373373
partition_key="pk1",

sdk/cosmos/azure-cosmos/tests/test_availability_strategy.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -342,14 +342,14 @@ def _clean_up_container(self, database_id: str, container_id: str):
342342
def test_invalid_thresholds(self, threshold_ms, threshold_steps_ms, error_message):
343343
"""Test that creating strategy with non-positive thresholds raises ValueError when enabled"""
344344
with pytest.raises(ValueError, match=error_message):
345-
config = {'type':'CrossRegionHedging', 'threshold_ms':threshold_ms, 'threshold_steps_ms':threshold_steps_ms}
345+
config = {'threshold_ms':threshold_ms, 'threshold_steps_ms':threshold_steps_ms}
346346
_validate_hedging_config(config)
347347

348348
@pytest.mark.parametrize("operation", [READ, QUERY, QUERY_PK, READ_ALL, CHANGE_FEED, CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH])
349349
@pytest.mark.parametrize("client_availability_strategy, request_availability_strategy", [
350-
(None, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}),
351-
({'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
352-
({'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50})
350+
(None, {'threshold_ms':150, 'threshold_steps_ms':50}),
351+
({'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
352+
({'threshold_ms':150, 'threshold_steps_ms':50}, {'threshold_ms':150, 'threshold_steps_ms':50})
353353
])
354354
def test_availability_strategy_in_steady_state(
355355
self,
@@ -388,9 +388,9 @@ def test_availability_strategy_in_steady_state(
388388

389389
@pytest.mark.parametrize("operation", [READ, QUERY, QUERY_PK, READ_ALL, CHANGE_FEED, CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH])
390390
@pytest.mark.parametrize("client_availability_strategy, request_availability_strategy", [
391-
(None, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}),
392-
({'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
393-
({'type':'CrossRegionHedging', 'threshold_ms':700, 'threshold_steps_ms':50}, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50})
391+
(None, {'threshold_ms':150, 'threshold_steps_ms':50}),
392+
({'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
393+
({'threshold_ms':700, 'threshold_steps_ms':50}, {'threshold_ms':150, 'threshold_steps_ms':50})
394394
])
395395
def test_client_availability_strategy_failover(
396396
self,
@@ -470,7 +470,7 @@ def test_non_transient_errors_from_failed_over_region(self, operation, status_co
470470
)
471471
custom_transport.add_fault(predicate_first_region, error_lambda_first_region)
472472

473-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
473+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
474474
setup = self._setup_method_with_custom_transport(
475475
custom_transport,
476476
multiple_write_locations=True)
@@ -520,7 +520,7 @@ def test_transient_error_from_failed_over_region(self, operation):
520520
)
521521
custom_transport.add_fault(predicate_first_region, error_lambda_first_region)
522522

523-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
523+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
524524
setup = self._setup_method_with_custom_transport(
525525
custom_transport,
526526
multiple_write_locations=True)
@@ -546,7 +546,7 @@ def test_transient_error_from_failed_over_region(self, operation):
546546
def test_request_level_disable_override_client_strategy(self, operation):
547547
"""Test that request-level disabled policy overrides client-level enabled policy"""
548548
# Setup client with enabled hedging policy
549-
client_strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
549+
client_strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
550550

551551
uri_down = _location_cache.LocationCache.GetLocationalEndpoint(self.host, self.REGION_1)
552552
failed_over_uri = _location_cache.LocationCache.GetLocationalEndpoint(self.host, self.REGION_2)
@@ -604,7 +604,7 @@ def test_request_level_enable_override_client_disable(self, operation):
604604
setup_without_fault['col'].create_item(body=doc)
605605

606606
# Create request-level policy to enable hedging
607-
request_strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
607+
request_strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
608608

609609
expected_uris = [uri_down, failed_over_uri]
610610
# Test operation with fault injection
@@ -644,7 +644,7 @@ def test_no_cross_region_request_with_exclude_regions(self, operation):
644644
excluded_uris = [failed_over_uri]
645645

646646
# Test should fail with error from the first region
647-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
647+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
648648
with pytest.raises(CosmosHttpResponseError) as exc_info:
649649
if operation in [READ, QUERY, QUERY_PK, READ_ALL, CHANGE_FEED]:
650650
_perform_read_operation(
@@ -696,7 +696,7 @@ def test_no_cross_region_request_with_retry_write_disabled(self, operation):
696696
expected_uris = [uri_down]
697697
excluded_uris = [failed_over_uri]
698698

699-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
699+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
700700

701701
# Test should fail with error from the first region
702702
with pytest.raises(CosmosHttpResponseError) as exc_info:
@@ -732,7 +732,7 @@ def test_per_partition_circular_breaker_with_cancelled_first_future(self, operat
732732

733733
custom_transport = self._get_custom_transport_with_fault_injection(predicate, error_lambda)
734734

735-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
735+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
736736

737737
setup_with_fault_injection = self._setup_method_with_custom_transport(
738738
custom_transport,
@@ -815,7 +815,7 @@ def test_custom_thread_pool_executor(self):
815815

816816
custom_transport = self._get_custom_transport_with_fault_injection(predicate, error_lambda)
817817

818-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
818+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
819819
setup_with_transport = self._setup_method_with_custom_transport(
820820
custom_transport,
821821
availability_strategy_executor=ThreadPoolExecutor(max_workers=1))

sdk/cosmos/azure-cosmos/tests/test_availability_strategy_async.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -359,17 +359,17 @@ async def _clean_up_container(self, client: CosmosClient, database_id:str, conta
359359
def test_invalid_thresholds(self, threshold_ms, threshold_steps_ms, error_message):
360360
"""Test that creating strategy with non-positive thresholds raises ValueError when enabled"""
361361
with pytest.raises(ValueError, match=error_message):
362-
config = {'type': 'CrossRegionHedging', 'threshold_ms': threshold_ms,
362+
config = {'threshold_ms': threshold_ms,
363363
'threshold_steps_ms': threshold_steps_ms}
364364
_validate_hedging_config(config)
365365

366366
@pytest.mark.asyncio
367367
@pytest.mark.parametrize("operation", [READ, QUERY, QUERY_PK, READ_ALL, CHANGE_FEED, CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH])
368368
@pytest.mark.parametrize("client_availability_strategy, request_availability_strategy", [
369-
(None, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}),
370-
({'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
371-
({'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50},
372-
{'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50})
369+
(None, {'threshold_ms':150, 'threshold_steps_ms':50}),
370+
({'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
371+
({'threshold_ms':150, 'threshold_steps_ms':50},
372+
{'threshold_ms':150, 'threshold_steps_ms':50})
373373
])
374374
async def test_availability_strategy_in_steady_state(
375375
self,
@@ -415,9 +415,9 @@ async def test_availability_strategy_in_steady_state(
415415
@pytest.mark.asyncio
416416
@pytest.mark.parametrize("operation",[READ, QUERY, QUERY_PK, READ_ALL, CHANGE_FEED, CREATE, UPSERT, REPLACE, DELETE, PATCH, BATCH])
417417
@pytest.mark.parametrize("client_availability_strategy, request_availability_strategy", [
418-
(None, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}),
419-
({'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
420-
({'type':'CrossRegionHedging', 'threshold_ms':700, 'threshold_steps_ms':50}, {'type':'CrossRegionHedging', 'threshold_ms':150, 'threshold_steps_ms':50})
418+
(None, {'threshold_ms':150, 'threshold_steps_ms':50}),
419+
({'threshold_ms':150, 'threshold_steps_ms':50}, _Unset),
420+
({'threshold_ms':700, 'threshold_steps_ms':50}, {'threshold_ms':150, 'threshold_steps_ms':50})
421421
])
422422
async def test_client_availability_strategy_failover(
423423
self,
@@ -521,7 +521,7 @@ async def test_non_transient_errors_from_failed_over_region(self, operation, sta
521521
expected_uris = [uri_down, failed_over_uri]
522522

523523
# Test should fail with original error without failover
524-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
524+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
525525
with pytest.raises(CosmosHttpResponseError) as exc_info:
526526
if operation in [READ, QUERY, QUERY_PK, READ_ALL, CHANGE_FEED]:
527527
await _perform_read_operation(
@@ -577,7 +577,7 @@ async def test_transient_error_from_failed_over_region(self, operation, setup):
577577
)
578578
custom_transport.add_fault(predicate_first_region, error_lambda_first_region)
579579

580-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
580+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
581581
setup_with_transport = await self._setup_method_with_custom_transport(
582582
setup['write_locations'],
583583
setup['read_locations'],
@@ -625,7 +625,7 @@ async def test_transient_error_from_failed_over_region(self, operation, setup):
625625
async def test_request_level_disabled_override_client_policy(self, operation, setup):
626626
"""Test that request-level disabled policy overrides client-level enabled policy"""
627627
# Setup client with enabled hedging policy
628-
client_strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
628+
client_strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
629629

630630
uri_down = _location_cache.LocationCache.GetLocationalEndpoint(self.host, setup['region_1'])
631631
failed_over_uri = _location_cache.LocationCache.GetLocationalEndpoint(self.host, setup['region_2'])
@@ -701,7 +701,7 @@ async def test_request_level_enabled_override_client_disabled(self, operation, s
701701
await setup_without_fault['col'].create_item(doc)
702702

703703
# Create request-level enabled policy
704-
request_strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
704+
request_strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
705705

706706
expected_uris = [uri_down, failed_over_uri]
707707
# Test operation with fault injection
@@ -748,7 +748,7 @@ async def test_no_cross_region_request_with_retry_write_disabled(self, operation
748748

749749
expected_uris = [uri_down]
750750
excluded_uris = [failed_over_uri]
751-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
751+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
752752

753753
# Test should fail with error from the first region
754754
with pytest.raises(CosmosHttpResponseError) as exc_info:
@@ -801,7 +801,7 @@ async def test_no_cross_region_request_with_exclude_regions(self, operation, set
801801
expected_uris = [uri_down]
802802
excluded_uris = [failed_over_uri]
803803

804-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
804+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
805805

806806
# Test should fail with error from the first region
807807
with pytest.raises(CosmosHttpResponseError) as exc_info:
@@ -858,7 +858,7 @@ async def test_per_partition_circular_breaker_with_cancelled_first_future(self,
858858

859859
custom_transport = self._get_custom_transport_with_fault_injection(predicate, error_lambda)
860860

861-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
861+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
862862

863863
setup_with_fault_injection = await self._setup_method_with_custom_transport(
864864
setup['write_locations'],
@@ -949,7 +949,7 @@ async def test_max_concurrency(self, setup):
949949

950950
custom_transport = self._get_custom_transport_with_fault_injection(predicate, error_lambda)
951951

952-
strategy = {'type':'CrossRegionHedging', 'threshold_ms':100, 'threshold_steps_ms':50}
952+
strategy = {'threshold_ms':100, 'threshold_steps_ms':50}
953953
setup_with_transport = await self._setup_method_with_custom_transport(
954954
setup['write_locations'],
955955
setup['read_locations'],

0 commit comments

Comments
 (0)