Skip to content

Commit 8a42e73

Browse files
authored
[Identity] Add custom IMDS retry policy (#42330)
This custom policy adds logic to increase the retry backoff factor in the event that a response is a 410. It can take up to 70 seconds for IMDS to become available. Signed-off-by: Paul Van Eck <[email protected]>
1 parent 18e74ec commit 8a42e73

File tree

6 files changed

+175
-6
lines changed

6 files changed

+175
-6
lines changed

sdk/identity/azure-identity/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
### Other Changes
1414

15+
- `ManagedIdentityCredential` now retries IMDS 410 status responses for at least 70 seconds total duration as required by [Azure IMDS documentation](https://learn.microsoft.com/azure/virtual-machines/instance-metadata-service?tabs=windows#errors-and-debugging). ([#42330](https://github.com/Azure/azure-sdk-for-python/pull/42330))
16+
1517
## 1.24.0b1 (2025-07-17)
1618

1719
### Features Added

sdk/identity/azure-identity/azure/identity/_credentials/imds.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
import json
77
from typing import Any, Optional, Dict
88

9+
from azure.core.pipeline import PipelineResponse
910
from azure.core.exceptions import ClientAuthenticationError, HttpResponseError
1011
from azure.core.pipeline.transport import HttpRequest
1112
from azure.core.credentials import AccessTokenInfo
13+
from azure.core.pipeline.policies import RetryPolicy
1214

1315
from .. import CredentialUnavailableError
1416
from .._constants import EnvironmentVariables
@@ -31,6 +33,28 @@
3133
}
3234

3335

36+
class ImdsRetryPolicy(RetryPolicy):
37+
"""Custom retry policy for IMDS credential with extended retry duration for 410 responses.
38+
39+
This policy ensures that specifically for 410 status codes, the total exponential backoff duration
40+
is at least 70 seconds to handle temporary IMDS endpoint unavailability.
41+
For other status codes, it uses the standard retry behavior.
42+
"""
43+
44+
def __init__(self, **kwargs: Any) -> None:
45+
# Increased backoff factor to ensure at least 70 seconds retry duration for 410 responses.
46+
# Five retries, with each retry sleeping for [0.0s, 5.0s, 10.0s, 20.0s, 40.0s] between attempts (75s total)
47+
self.backoff_factor_for_410 = 2.5
48+
super().__init__(**kwargs)
49+
50+
def is_retry(self, settings: Dict[str, Any], response: PipelineResponse[Any, Any]) -> bool:
51+
if response.http_response.status_code == 410:
52+
settings["backoff"] = self.backoff_factor_for_410
53+
else:
54+
settings["backoff"] = self.backoff_factor
55+
return super().is_retry(settings, response)
56+
57+
3458
def _get_request(scope: str, identity_config: Dict) -> HttpRequest:
3559
url = (
3660
os.environ.get(EnvironmentVariables.AZURE_POD_IDENTITY_AUTHORITY_HOST, IMDS_AUTHORITY).strip("/")
@@ -58,7 +82,7 @@ def _check_forbidden_response(ex: HttpResponseError) -> None:
5882

5983
class ImdsCredential(MsalManagedIdentityClient):
6084
def __init__(self, **kwargs: Any) -> None:
61-
super(ImdsCredential, self).__init__(**kwargs)
85+
super().__init__(retry_policy_class=ImdsRetryPolicy, **dict(PIPELINE_SETTINGS, **kwargs))
6286
self._config = kwargs
6387

6488
if EnvironmentVariables.AZURE_POD_IDENTITY_AUTHORITY_HOST in os.environ:

sdk/identity/azure-identity/azure/identity/_internal/pipeline.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def _get_policies(config, _per_retry_policies=None, **kwargs):
6363
def build_pipeline(transport=None, policies=None, **kwargs):
6464
if not policies:
6565
config = _get_config(**kwargs)
66-
config.retry_policy = RetryPolicy(**kwargs)
66+
retry_policy_class = kwargs.pop("retry_policy_class", None)
67+
config.retry_policy = retry_policy_class(**kwargs) if retry_policy_class else RetryPolicy(**kwargs)
6768
policies = _get_policies(config, **kwargs)
6869
if not transport:
6970
from azure.core.pipeline.transport import ( # pylint: disable=non-abstract-transport-import, no-name-in-module
@@ -82,7 +83,8 @@ def build_async_pipeline(transport=None, policies=None, **kwargs):
8283
from azure.core.pipeline.policies import AsyncRetryPolicy
8384

8485
config = _get_config(**kwargs)
85-
config.retry_policy = AsyncRetryPolicy(**kwargs)
86+
retry_policy_class = kwargs.pop("retry_policy_class", None)
87+
config.retry_policy = retry_policy_class(**kwargs) if retry_policy_class else AsyncRetryPolicy(**kwargs)
8688
policies = _get_policies(config, **kwargs)
8789
if not transport:
8890
from azure.core.pipeline.transport import ( # pylint: disable=non-abstract-transport-import, no-name-in-module

sdk/identity/azure-identity/azure/identity/aio/_credentials/imds.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
# Licensed under the MIT License.
44
# ------------------------------------
55
import os
6-
from typing import Optional, Any
6+
from typing import Optional, Any, Dict
77

88
from azure.core.exceptions import ClientAuthenticationError, HttpResponseError
99
from azure.core.credentials import AccessTokenInfo
10+
from azure.core.pipeline.policies import AsyncRetryPolicy
11+
from azure.core.pipeline import PipelineResponse
12+
1013
from ... import CredentialUnavailableError
1114
from ..._constants import EnvironmentVariables
1215
from .._internal import AsyncContextManager
@@ -16,10 +19,33 @@
1619
from ..._credentials.imds import _get_request, _check_forbidden_response, PIPELINE_SETTINGS
1720

1821

22+
class AsyncImdsRetryPolicy(AsyncRetryPolicy):
23+
"""Async custom retry policy for IMDS credential with extended retry duration for 410 responses.
24+
25+
This policy ensures that specifically for 410 status codes, the total exponential backoff duration
26+
is at least 70 seconds to handle temporary IMDS endpoint unavailability.
27+
For other status codes, it uses the standard retry behavior.
28+
"""
29+
30+
def __init__(self, **kwargs: Any) -> None:
31+
# Increased backoff factor to ensure at least 70 seconds retry duration for 410 responses.
32+
# Five retries, with each retry sleeping for [0.0s, 5.0s, 10.0s, 20.0s, 40.0s] between attempts (75s total)
33+
self.backoff_factor_for_410 = 2.5
34+
super().__init__(**kwargs)
35+
36+
def is_retry(self, settings: Dict[str, Any], response: PipelineResponse[Any, Any]) -> bool:
37+
if response.http_response.status_code == 410:
38+
settings["backoff"] = self.backoff_factor_for_410
39+
else:
40+
settings["backoff"] = self.backoff_factor
41+
return super().is_retry(settings, response)
42+
43+
1944
class ImdsCredential(AsyncContextManager, GetTokenMixin):
2045
def __init__(self, **kwargs: Any) -> None:
2146
super().__init__()
2247

48+
kwargs["retry_policy_class"] = AsyncImdsRetryPolicy
2349
self._client = AsyncManagedIdentityClient(_get_request, **dict(PIPELINE_SETTINGS, **kwargs))
2450
if EnvironmentVariables.AZURE_POD_IDENTITY_AUTHORITY_HOST in os.environ:
2551
self._endpoint_available: Optional[bool] = True

sdk/identity/azure-identity/tests/test_imds_credential.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,20 @@
44
# ------------------------------------
55
from itertools import product
66
import time
7+
from unittest.mock import Mock
78

89
from azure.identity import CredentialUnavailableError
9-
from azure.identity._credentials.imds import IMDS_TOKEN_PATH, ImdsCredential, IMDS_AUTHORITY
10+
from azure.identity._credentials.imds import (
11+
IMDS_TOKEN_PATH,
12+
ImdsCredential,
13+
ImdsRetryPolicy,
14+
IMDS_AUTHORITY,
15+
PIPELINE_SETTINGS,
16+
)
1017
from azure.identity._internal.utils import within_credential_chain
18+
from azure.core.pipeline import PipelineResponse
19+
from azure.core.pipeline.policies import RetryPolicy
20+
from azure.core.pipeline.transport import HttpRequest, HttpResponse
1121
import pytest
1222

1323
from helpers import mock, mock_response, Request, validating_transport, GET_TOKEN_METHODS
@@ -135,3 +145,53 @@ def test_managed_identity_aci_probe(self, get_token_method):
135145
token = getattr(credential, get_token_method)(scope)
136146
assert token.token == expected_token
137147
within_credential_chain.set(False)
148+
149+
def test_imds_credential_uses_custom_retry_policy(self):
150+
credential = ImdsCredential()
151+
policies = credential._client._pipeline._impl_policies
152+
assert any(isinstance(policy, ImdsRetryPolicy) for policy in policies)
153+
# Only one retry policy should be present
154+
assert sum(isinstance(policy, RetryPolicy) for policy in policies) == 1
155+
156+
def test_imds_retry_policy(self):
157+
retry_policy = ImdsRetryPolicy(**PIPELINE_SETTINGS)
158+
159+
# Create a shared mock HttpRequest
160+
request = Mock(spec=HttpRequest, body=None, files=None)
161+
request.method = "GET"
162+
163+
# Helper to create HttpResponse and PipelineResponse mocks
164+
def make_pipeline_response(status_code):
165+
response = Mock(spec=HttpResponse, status_code=status_code, http_request=request)
166+
response.headers = {}
167+
pipeline_response = Mock(spec=PipelineResponse, http_request=request, http_response=response)
168+
return pipeline_response
169+
170+
pipeline_response_410 = make_pipeline_response(410)
171+
pipeline_response_404 = make_pipeline_response(404)
172+
173+
# Simulate 5 retries for 410 response
174+
settings_410 = retry_policy.configure_retries({})
175+
total_time_410 = 0
176+
for _ in range(5):
177+
if retry_policy.is_retry(settings_410, pipeline_response_410):
178+
retry_policy.increment(settings_410, response=pipeline_response_410, error=None)
179+
backoff_time = retry_policy.get_backoff_time(settings_410)
180+
total_time_410 += backoff_time
181+
182+
assert (
183+
total_time_410 >= 70
184+
), f"Total retry time for 410 responses should be at least 70 seconds, got {total_time_410:.2f} seconds"
185+
186+
# Simulate 5 retries for 404 response
187+
settings_404 = retry_policy.configure_retries({})
188+
total_time_404 = 0
189+
for _ in range(5):
190+
if retry_policy.is_retry(settings_404, pipeline_response_404):
191+
retry_policy.increment(settings_404, response=pipeline_response_404, error=None)
192+
backoff_time = retry_policy.get_backoff_time(settings_404)
193+
total_time_404 += backoff_time
194+
195+
assert (
196+
total_time_404 < 30
197+
), f"Total retry time for 404 responses should use standard backoff, got {total_time_404:.2f} seconds"

sdk/identity/azure-identity/tests/test_imds_credential_async.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@
66
import json
77
import time
88
from unittest import mock
9+
from unittest.mock import Mock
910

1011
from azure.core.exceptions import ClientAuthenticationError
1112
from azure.identity import CredentialUnavailableError
1213
from azure.identity._constants import EnvironmentVariables
1314
from azure.identity._credentials.imds import IMDS_AUTHORITY, IMDS_TOKEN_PATH
1415
from azure.identity._internal.user_agent import USER_AGENT
15-
from azure.identity.aio._credentials.imds import ImdsCredential, PIPELINE_SETTINGS
16+
from azure.identity.aio._credentials.imds import ImdsCredential, AsyncImdsRetryPolicy
17+
from azure.identity._credentials.imds import PIPELINE_SETTINGS
18+
from azure.core.pipeline import PipelineResponse
19+
from azure.core.pipeline.policies import AsyncRetryPolicy
20+
from azure.core.pipeline.transport import HttpRequest, HttpResponse
1621
from azure.identity._internal.utils import within_credential_chain
1722
import pytest
1823

@@ -346,3 +351,53 @@ async def test_managed_identity_aci_probe(self, get_token_method):
346351
token = await getattr(credential, get_token_method)(scope)
347352
assert token.token == expected_token
348353
within_credential_chain.set(False)
354+
355+
async def test_imds_credential_uses_custom_retry_policy(self):
356+
credential = ImdsCredential()
357+
policies = credential._client._pipeline._impl_policies # type: ignore
358+
assert any(isinstance(policy, AsyncImdsRetryPolicy) for policy in policies)
359+
# Only one retry policy should be present
360+
assert sum(isinstance(policy, AsyncRetryPolicy) for policy in policies) == 1
361+
362+
def test_imds_retry_policy(self):
363+
retry_policy = AsyncImdsRetryPolicy(**PIPELINE_SETTINGS)
364+
365+
# Create a shared mock HttpRequest
366+
request = Mock(spec=HttpRequest, body=None, files=None)
367+
request.method = "GET"
368+
369+
# Helper to create HttpResponse and PipelineResponse mocks
370+
def make_pipeline_response(status_code):
371+
response = Mock(spec=HttpResponse, status_code=status_code, http_request=request)
372+
response.headers = {}
373+
pipeline_response = Mock(spec=PipelineResponse, http_request=request, http_response=response)
374+
return pipeline_response
375+
376+
pipeline_response_410 = make_pipeline_response(410)
377+
pipeline_response_404 = make_pipeline_response(404)
378+
379+
# Simulate 5 retries for 410 response
380+
settings_410 = retry_policy.configure_retries({})
381+
total_time_410 = 0
382+
for _ in range(5):
383+
if retry_policy.is_retry(settings_410, pipeline_response_410):
384+
retry_policy.increment(settings_410, response=pipeline_response_410, error=None)
385+
backoff_time = retry_policy.get_backoff_time(settings_410)
386+
total_time_410 += backoff_time
387+
388+
assert (
389+
total_time_410 >= 70
390+
), f"Total retry time for 410 responses should be at least 70 seconds, got {total_time_410:.2f} seconds"
391+
392+
# Simulate 5 retries for 404 response
393+
settings_404 = retry_policy.configure_retries({})
394+
total_time_404 = 0
395+
for _ in range(5):
396+
if retry_policy.is_retry(settings_404, pipeline_response_404):
397+
retry_policy.increment(settings_404, response=pipeline_response_404, error=None)
398+
backoff_time = retry_policy.get_backoff_time(settings_404)
399+
total_time_404 += backoff_time
400+
401+
assert (
402+
total_time_404 < 30
403+
), f"Total retry time for 404 responses should use standard backoff, got {total_time_404:.2f} seconds"

0 commit comments

Comments
 (0)