Skip to content

Commit 07c08c9

Browse files
authored
[Test Proxy] Centrally sanitize sensitive patterns for all tests (#35196)
* Add sanitizer enum and batch sanitizing method * Add shared, fake credentials * Add central set of sanitizers * Remove KV custom matcher * Add extra batch of sanitizers
1 parent 5654320 commit 07c08c9

File tree

5 files changed

+235
-19
lines changed

5 files changed

+235
-19
lines changed

sdk/keyvault/azure-keyvault-secrets/tests/test_samples_secrets_async.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,6 @@ async def test_example_secret_crud_operations(self, client, **kwargs):
109109
@AsyncSecretsClientPreparer()
110110
@recorded_by_proxy_async
111111
async def test_example_secret_list_operations(self, client, **kwargs):
112-
if not is_live():
113-
set_custom_default_matcher(excluded_headers="Authorization")
114112
secret_client = client
115113
async with secret_client:
116114
for i in range(7):

tools/azure-sdk-tools/devtools_testutils/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .proxy_testcase import recorded_by_proxy
2424
from .sanitizers import (
2525
add_api_version_transform,
26+
add_batch_sanitizers,
2627
add_body_key_sanitizer,
2728
add_body_regex_sanitizer,
2829
add_body_string_sanitizer,
@@ -40,6 +41,7 @@
4041
add_uri_string_sanitizer,
4142
add_uri_subscription_id_sanitizer,
4243
PemCertificate,
44+
Sanitizer,
4345
set_bodiless_matcher,
4446
set_custom_default_matcher,
4547
set_default_function_settings,
@@ -56,6 +58,7 @@
5658

5759
__all__ = [
5860
"add_api_version_transform",
61+
"add_batch_sanitizers",
5962
"add_body_key_sanitizer",
6063
"add_body_regex_sanitizer",
6164
"add_body_string_sanitizer",
@@ -79,6 +82,7 @@
7982
"FakeResource",
8083
"ReservedResourceNameError",
8184
"ResourceGroupPreparer",
85+
"Sanitizer",
8286
"StorageAccountPreparer",
8387
"BlobAccountPreparer",
8488
"CachedStorageAccountPreparer",

tools/azure-sdk-tools/devtools_testutils/fake_credentials.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@
22

33

44
# General-use fake credentials
5+
FAKE_ACCESS_TOKEN = "eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJlbWFpbCI6IkJvYkBjb250b3NvLmNvbSIsImdpdmVuX25hbWUiOiJCb2I" \
6+
"iLCJpc3MiOiJodHRwOi8vRGVmYXVsdC5Jc3N1ZXIuY29tIiwiYXVkIjoiaHR0cDovL0RlZmF1bHQuQXVkaWVuY2UuY29tIiwiaWF0IjoiMTYwNz" \
7+
"k3ODY4MyIsIm5iZiI6IjE2MDc5Nzg2ODMiLCJleHAiOiIxNjA3OTc4OTgzIn0."
8+
FAKE_ID = "00000000-0000-0000-0000-000000000000"
59
FAKE_LOGIN_PASSWORD = "F4ke_L0gin_P4ss"
610

711
# Service-specific fake credentials
812
BATCH_TEST_PASSWORD = "kt#_gahr!@aGERDXA"
913
MGMT_HDINSIGHT_FAKE_KEY = "qFmud5LfxcCxWUvWcGMhKDp0v0KuBRLsO/AIddX734W7lzdInsVMsB5ILVoOrF+0fCfk/IYYy5SJ9Q+2v4aihQ=="
14+
SERVICEBUS_FAKE_SAS = "SharedAccessSignature sr=https%3A%2F%2Ffoo.servicebus.windows.net&sig=dummyValue%3D&se=168726" \
15+
"7490&skn=dummyKey"
1016
STORAGE_ACCOUNT_FAKE_KEY = "NzhL3hKZbJBuJ2484dPTR+xF30kYaWSSCbs2BzLgVVI1woqeST/1IgqaLm6QAOTxtGvxctSNbIR/1hW8yH+bJg=="
1117

1218

tools/azure-sdk-tools/devtools_testutils/proxy_startup.py

Lines changed: 159 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,13 @@
2424
from ci_tools.variables import in_ci
2525

2626
from .config import PROXY_URL
27+
from .fake_credentials import FAKE_ACCESS_TOKEN, FAKE_ID, SERVICEBUS_FAKE_SAS
2728
from .helpers import get_http_client, is_live_and_not_recording
28-
from .sanitizers import add_oauth_response_sanitizer, add_remove_header_sanitizer, set_custom_default_matcher
29+
from .sanitizers import (
30+
add_batch_sanitizers,
31+
Sanitizer,
32+
set_custom_default_matcher,
33+
)
2934

3035

3136
load_dotenv(find_dotenv())
@@ -280,6 +285,158 @@ def prepare_local_tool(repo_root: str) -> str:
280285
)
281286

282287

288+
def set_common_sanitizers() -> None:
289+
"""Register sanitizers that will apply to all recordings throughout the SDK."""
290+
SANITIZED = "Sanitized"
291+
batch_sanitizers = {}
292+
293+
# Remove headers from recordings if we don't need them, and ignore them if present
294+
# Authorization, for example, can contain sensitive info and can cause matching failures during challenge auth
295+
headers_to_ignore = "Authorization, x-ms-client-request-id, x-ms-request-id"
296+
set_custom_default_matcher(excluded_headers=headers_to_ignore)
297+
batch_sanitizers[Sanitizer.REMOVE_HEADER] = [{"headers": headers_to_ignore}]
298+
299+
# Remove OAuth interactions, which can contain client secrets and aren't necessary for playback testing
300+
batch_sanitizers[Sanitizer.OAUTH_RESPONSE] = [None]
301+
302+
# Body key sanitizers for sensitive fields in JSON requests/responses
303+
batch_sanitizers[Sanitizer.BODY_KEY] = [
304+
{"json_path": "$..access_token", "value": FAKE_ACCESS_TOKEN},
305+
{"json_path": "$..AccessToken", "value": FAKE_ACCESS_TOKEN},
306+
{"json_path": "$..targetModelLocation", "value": SANITIZED},
307+
{"json_path": "$..targetResourceId", "value": SANITIZED},
308+
{"json_path": "$..urlSource", "value": SANITIZED},
309+
{"json_path": "$..azureBlobSource.containerUrl", "value": SANITIZED},
310+
{"json_path": "$..source", "value": SANITIZED},
311+
{"json_path": "$..resourceLocation", "value": SANITIZED},
312+
{"json_path": "Location", "value": SANITIZED},
313+
{"json_path": "$..to", "value": SANITIZED},
314+
{"json_path": "$..from", "value": SANITIZED},
315+
{"json_path": "$..sasUri", "value": SANITIZED},
316+
{"json_path": "$..containerUri", "value": SANITIZED},
317+
{"json_path": "$..inputDataUri", "value": SANITIZED},
318+
{"json_path": "$..outputDataUri", "value": SANITIZED},
319+
{"json_path": "$..id", "value": SANITIZED},
320+
{"json_path": "$..token", "value": SANITIZED},
321+
{"json_path": "$..appId", "value": SANITIZED},
322+
{"json_path": "$..userId", "value": SANITIZED},
323+
{"json_path": "$..storageAccount", "value": SANITIZED},
324+
{"json_path": "$..resourceGroup", "value": SANITIZED},
325+
{"json_path": "$..guardian", "value": SANITIZED},
326+
{"json_path": "$..scan", "value": SANITIZED},
327+
{"json_path": "$..catalog", "value": SANITIZED},
328+
{"json_path": "$..lastModifiedBy", "value": SANITIZED},
329+
{"json_path": "$..managedResourceGroupName", "value": SANITIZED},
330+
{"json_path": "$..friendlyName", "value": SANITIZED},
331+
{"json_path": "$..createdBy", "value": SANITIZED},
332+
{"json_path": "$..credential", "value": SANITIZED},
333+
{"json_path": "$..aliasPrimaryConnectionString", "value": SANITIZED},
334+
{"json_path": "$..aliasSecondaryConnectionString", "value": SANITIZED},
335+
{"json_path": "$..connectionString", "value": SANITIZED},
336+
{"json_path": "$..primaryConnectionString", "value": SANITIZED},
337+
{"json_path": "$..secondaryConnectionString", "value": SANITIZED},
338+
{"json_path": "$..sshPassword", "value": SANITIZED},
339+
{"json_path": "$..primaryKey", "value": SANITIZED},
340+
{"json_path": "$..secondaryKey", "value": SANITIZED},
341+
{"json_path": "$..runAsPassword", "value": SANITIZED},
342+
{"json_path": "$..adminPassword", "value": SANITIZED},
343+
{"json_path": "$..adminPassword.value", "value": SANITIZED},
344+
{"json_path": "$..administratorLoginPassword", "value": SANITIZED},
345+
{"json_path": "$..accessSAS", "value": SANITIZED},
346+
{"json_path": "$..WEBSITE_AUTH_ENCRYPTION_KEY", "value": SANITIZED},
347+
{"json_path": "$..storageContainerWriteSas", "value": SANITIZED},
348+
{"json_path": "$..storageContainerUri", "value": SANITIZED},
349+
{"json_path": "$..storageContainerReadListSas", "value": SANITIZED},
350+
{"json_path": "$..storageAccountPrimaryKey", "value": SANITIZED},
351+
{"json_path": "$..uploadUrl", "value": SANITIZED},
352+
{"json_path": "$..secondaryReadonlyMasterKey", "value": SANITIZED},
353+
{"json_path": "$..primaryMasterKey", "value": SANITIZED},
354+
{"json_path": "$..primaryReadonlyMasterKey", "value": SANITIZED},
355+
{"json_path": "$..secondaryMasterKey", "value": SANITIZED},
356+
{"json_path": "$..scriptUrlSasToken", "value": SANITIZED},
357+
{"json_path": "$..privateKey", "value": SANITIZED},
358+
{"json_path": "$..password", "value": SANITIZED},
359+
{"json_path": "$..logLink", "value": SANITIZED},
360+
{"json_path": "$..keyVaultClientSecret", "value": SANITIZED},
361+
{"json_path": "$..httpHeader", "value": SANITIZED},
362+
{"json_path": "$..functionKey", "value": SANITIZED},
363+
{"json_path": "$..fencingClientPassword", "value": SANITIZED},
364+
{"json_path": "$..encryptedCredential", "value": SANITIZED},
365+
{"json_path": "$..clientSecret", "value": SANITIZED},
366+
{"json_path": "$..certificatePassword", "value": SANITIZED},
367+
{"json_path": "$..authHeader", "value": SANITIZED},
368+
{"json_path": "$..atlasKafkaSecondaryEndpoint", "value": SANITIZED},
369+
{"json_path": "$..atlasKafkaPrimaryEndpoint", "value": SANITIZED},
370+
{"json_path": "$..appkey", "value": SANITIZED},
371+
{"json_path": "$..acrToken", "value": SANITIZED},
372+
{"json_path": "$..accountKey", "value": SANITIZED},
373+
{"json_path": "$..accountName", "value": SANITIZED},
374+
{"json_path": "$..decryptionKey", "value": SANITIZED},
375+
{"json_path": "$..applicationId", "value": SANITIZED},
376+
{"json_path": "$..apiKey", "value": SANITIZED},
377+
{"json_path": "$..userName", "value": SANITIZED},
378+
{"json_path": "$.properties.DOCKER_REGISTRY_SERVER_PASSWORD", "value": SANITIZED},
379+
{"json_path": "$.value[*].key", "value": SANITIZED},
380+
{"json_path": "$.key", "value": SANITIZED},
381+
{"json_path": "$..clientId", "value": FAKE_ID},
382+
{"json_path": "$..principalId", "value": FAKE_ID},
383+
{"json_path": "$..tenantId", "value": FAKE_ID},
384+
]
385+
386+
# Body regex sanitizers for sensitive patterns in request/response bodies
387+
batch_sanitizers[Sanitizer.BODY_REGEX] = [
388+
{"regex": "(client_id=)[^&]+", "value": "$1sanitized"},
389+
{"regex": "(client_secret=)[^&]+", "value": "$1sanitized"},
390+
{"regex": "(client_assertion=)[^&]+", "value": "$1sanitized"},
391+
{"regex": "(?:(sv|sig|se|srt|ss|sp)=)(?<secret>(([^&\\s]*)))", "value": SANITIZED},
392+
{"regex": "refresh_token=(?<group>.*?)(?=&|$)", "group_for_replace": "group", "value": SANITIZED},
393+
{"regex": "access_token=(?<group>.*?)(?=&|$)", "group_for_replace": "group", "value": SANITIZED},
394+
{"regex": "token=(?<token>[^\\u0026]+)($|\\u0026)", "group_for_replace": "token", "value": SANITIZED},
395+
{"regex": "-----BEGIN PRIVATE KEY-----\\n(.+\\n)*-----END PRIVATE KEY-----\\n", "value": SANITIZED},
396+
{"regex": "(?<=<UserDelegationKey>).*?(?:<SignedTid>)(.*)(?:</SignedTid>)", "value": SANITIZED},
397+
{"regex": "(?<=<UserDelegationKey>).*?(?:<SignedOid>)(.*)(?:</SignedOid>)", "value": SANITIZED},
398+
{"regex": "(?<=<UserDelegationKey>).*?(?:<Value>)(.*)(?:</Value>)", "value": SANITIZED},
399+
{"regex": "(?:Password=)(.*?)(?:;)", "value": SANITIZED},
400+
{"regex": "(?:User ID=)(.*?)(?:;)", "value": SANITIZED},
401+
{"regex": "(?:<PrimaryKey>)(.*)(?:</PrimaryKey>)", "value": SANITIZED},
402+
{"regex": "(?:<SecondaryKey>)(.*)(?:</SecondaryKey>)", "value": SANITIZED},
403+
]
404+
405+
# General regex sanitizers for sensitive patterns throughout interactions
406+
batch_sanitizers[Sanitizer.GENERAL_REGEX] = [
407+
{"regex": "SharedAccessKey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
408+
{"regex": "AccountKey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
409+
{"regex": "accesskey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
410+
{"regex": "Accesskey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
411+
{"regex": "Secret=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED},
412+
]
413+
414+
# Header regex sanitizers for sensitive patterns in request/response headers
415+
batch_sanitizers[Sanitizer.HEADER_REGEX] = [
416+
{"key": "subscription-key", "value": SANITIZED},
417+
{"key": "x-ms-encryption-key", "value": SANITIZED},
418+
{"key": "x-ms-rename-source", "value": SANITIZED},
419+
{"key": "x-ms-file-rename-source", "value": SANITIZED},
420+
{"key": "x-ms-copy-source", "value": SANITIZED},
421+
{"key": "x-ms-copy-source-authorization", "value": SANITIZED},
422+
{"key": "x-ms-file-rename-source-authorization", "value": SANITIZED},
423+
{"key": "x-ms-encryption-key-sha256", "value": SANITIZED},
424+
{"key": "api-key", "value": SANITIZED},
425+
{"key": "aeg-sas-token", "value": SANITIZED},
426+
{"key": "aeg-sas-key", "value": SANITIZED},
427+
{"key": "aeg-channel-name", "value": SANITIZED},
428+
{"key": "SupplementaryAuthorization", "value": SERVICEBUS_FAKE_SAS},
429+
]
430+
431+
# URI regex sanitizers for sensitive patterns in request/response URLs
432+
batch_sanitizers[Sanitizer.URI_REGEX] = [
433+
{"regex": "sig=(?<sig>[^&]+)", "group_for_replace": "sig", "value": SANITIZED}
434+
]
435+
436+
# Send all the above sanitizers to the test proxy in a single, batch request
437+
add_batch_sanitizers(sanitizers=batch_sanitizers)
438+
439+
283440
def start_test_proxy(request) -> None:
284441
"""Starts the test proxy and returns when the proxy server is ready to receive requests.
285442
@@ -334,12 +491,7 @@ def start_test_proxy(request) -> None:
334491

335492
# Wait for the proxy server to become available
336493
check_proxy_availability()
337-
# Remove headers from recordings if we don't need them, and ignore them if present
338-
# Authorization, for example, can contain sensitive info and can cause matching failures during challenge auth
339-
headers_to_ignore = "Authorization, x-ms-client-request-id, x-ms-request-id"
340-
add_remove_header_sanitizer(headers=headers_to_ignore)
341-
set_custom_default_matcher(excluded_headers=headers_to_ignore)
342-
add_oauth_response_sanitizer()
494+
set_common_sanitizers()
343495

344496

345497
def stop_test_proxy() -> None:

tools/azure-sdk-tools/devtools_testutils/sanitizers.py

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,30 @@
33
# Licensed under the MIT License. See License.txt in the project root for
44
# license information.
55
# --------------------------------------------------------------------------
6+
from enum import Enum
67
import json
7-
from typing import TYPE_CHECKING
8+
from typing import Dict, List, Optional
89

910
from .config import PROXY_URL
1011
from .helpers import get_http_client, get_recording_id, is_live, is_live_and_not_recording
1112

12-
if TYPE_CHECKING:
13-
from typing import Optional
13+
14+
class Sanitizer(str, Enum):
15+
"""Sanitizers that can be applied to recordings."""
16+
17+
BODY_KEY = "BodyKeySanitizer"
18+
BODY_REGEX = "BodyRegexSanitizer"
19+
BODY_STRING = "BodyStringSanitizer"
20+
CONTINUATION = "ContinuationSanitizer"
21+
GENERAL_REGEX = "GeneralRegexSanitizer"
22+
GENERAL_STRING = "GeneralStringSanitizer"
23+
HEADER_REGEX = "HeaderRegexSanitizer"
24+
HEADER_STRING = "HeaderStringSanitizer"
25+
OAUTH_RESPONSE = "OAuthResponseSanitizer"
26+
REMOVE_HEADER = "RemoveHeaderSanitizer"
27+
URI_REGEX = "UriRegexSanitizer"
28+
URI_STRING = "UriStringSanitizer"
29+
URI_SUBSCRIPTION_ID = "UriSubscriptionIdSanitizer"
1430

1531

1632
# This file contains methods for adjusting many aspects of test proxy behavior:
@@ -416,6 +432,46 @@ def add_uri_subscription_id_sanitizer(**kwargs) -> None:
416432
_send_sanitizer_request("UriSubscriptionIdSanitizer", request_args, {"x-recording-id": x_recording_id})
417433

418434

435+
def add_batch_sanitizers(sanitizers: Dict[str, List[Optional[Dict[str, str]]]], headers: Optional[Dict] = None) -> None:
436+
"""Registers a batch of sanitizers at once.
437+
438+
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
439+
request will be sent.
440+
441+
:param sanitizers: A group of sanitizers to add, as a dictionary. Keys should be sanitizer names (from the Sanitizer
442+
enum) and values should be lists containing dictionaries of sanitizer constructor parameters. The parameters
443+
should be formatted as key-value pairs aligning with keyword-only arguments to sanitizer methods.
444+
:type sanitizers: dict[str, list[Optional[dict]]]
445+
"""
446+
447+
if is_live_and_not_recording():
448+
return
449+
450+
data = [] # Body content to populate with multiple sanitizer definitions
451+
452+
for sanitizer in sanitizers:
453+
# Iterate over each instance of the particular sanitizer (e.g. each body regex sanitizer)
454+
for sanitizer_instance in sanitizers[sanitizer]:
455+
sanitizer_definition = {"Name": sanitizer}
456+
if sanitizer_instance:
457+
sanitizer_definition.update({"Body": _get_request_args(**sanitizer_instance)})
458+
data.append(sanitizer_definition)
459+
460+
headers_to_send = {"Content-Type": "application/json"}
461+
if headers is not None:
462+
for key in headers:
463+
if headers[key] is not None:
464+
headers_to_send[key] = headers[key]
465+
466+
http_client = get_http_client()
467+
http_client.request(
468+
method="POST",
469+
url="{}/Admin/AddSanitizers".format(PROXY_URL),
470+
headers=headers_to_send,
471+
body=json.dumps(data).encode("utf-8"),
472+
)
473+
474+
419475
# ----------TRANSFORMS----------
420476
#
421477
# A transform extends functionality of the test proxy by applying to responses just before they are returned during
@@ -538,7 +594,7 @@ def __init__(self, data: str, key: str) -> None:
538594
# ----------HELPERS----------
539595

540596

541-
def _get_recording_option_args(**kwargs) -> dict:
597+
def _get_recording_option_args(**kwargs) -> Dict:
542598
"""Returns a dictionary of recording option request arguments, formatted for test proxy consumption."""
543599

544600
certificates = kwargs.pop("certificates", None)
@@ -564,7 +620,7 @@ def _get_recording_option_args(**kwargs) -> dict:
564620
return request_args
565621

566622

567-
def _get_request_args(**kwargs) -> dict:
623+
def _get_request_args(**kwargs) -> Dict:
568624
"""Returns a dictionary of request arguments, formatted for test proxy consumption."""
569625

570626
request_args = {}
@@ -605,7 +661,7 @@ def _get_request_args(**kwargs) -> dict:
605661
return request_args
606662

607663

608-
def _send_matcher_request(matcher: str, headers: dict, parameters: "Optional[dict]" = None) -> None:
664+
def _send_matcher_request(matcher: str, headers: Dict, parameters: Optional[Dict] = None) -> None:
609665
"""Sends a POST request to the test proxy endpoint to register the specified matcher.
610666
611667
If live tests are being run, no request will be sent.
@@ -633,7 +689,7 @@ def _send_matcher_request(matcher: str, headers: dict, parameters: "Optional[dic
633689
)
634690

635691

636-
def _send_recording_options_request(parameters: dict, headers: "Optional[dict]" = None) -> None:
692+
def _send_recording_options_request(parameters: Dict, headers: Optional[Dict] = None) -> None:
637693
"""Sends a POST request to the test proxy endpoint to set the specified recording options.
638694
639695
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
@@ -661,7 +717,7 @@ def _send_recording_options_request(parameters: dict, headers: "Optional[dict]"
661717
)
662718

663719

664-
def _send_reset_request(headers: dict) -> None:
720+
def _send_reset_request(headers: Dict) -> None:
665721
"""Sends a POST request to the test proxy endpoint to reset setting customizations.
666722
667723
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
@@ -682,7 +738,7 @@ def _send_reset_request(headers: dict) -> None:
682738
http_client.request(method="POST", url=f"{PROXY_URL}/Admin/Reset", headers=headers_to_send)
683739

684740

685-
def _send_sanitizer_request(sanitizer: str, parameters: dict, headers: "Optional[dict]" = None) -> None:
741+
def _send_sanitizer_request(sanitizer: str, parameters: Dict, headers: Optional[Dict] = None) -> None:
686742
"""Sends a POST request to the test proxy endpoint to register the specified sanitizer.
687743
688744
If live tests are being run with recording turned off via the AZURE_SKIP_LIVE_RECORDING environment variable, no
@@ -709,7 +765,7 @@ def _send_sanitizer_request(sanitizer: str, parameters: dict, headers: "Optional
709765
)
710766

711767

712-
def _send_transform_request(transform: str, parameters: dict, headers: "Optional[dict]" = None) -> None:
768+
def _send_transform_request(transform: str, parameters: Dict, headers: Optional[Dict] = None) -> None:
713769
"""Sends a POST request to the test proxy endpoint to register the specified transform.
714770
715771
If live tests are being run, no request will be sent.

0 commit comments

Comments
 (0)