|
24 | 24 | from ci_tools.variables import in_ci |
25 | 25 |
|
26 | 26 | from .config import PROXY_URL |
| 27 | +from .fake_credentials import FAKE_ACCESS_TOKEN, FAKE_ID, SERVICEBUS_FAKE_SAS |
27 | 28 | from .helpers import get_http_client, is_live_and_not_recording |
28 | | -from .sanitizers import add_oauth_response_sanitizer, add_remove_header_sanitizer, set_custom_default_matcher |
| 29 | +from .sanitizers import ( |
| 30 | + add_batch_sanitizers, |
| 31 | + Sanitizer, |
| 32 | + set_custom_default_matcher, |
| 33 | +) |
29 | 34 |
|
30 | 35 |
|
31 | 36 | load_dotenv(find_dotenv()) |
@@ -280,6 +285,158 @@ def prepare_local_tool(repo_root: str) -> str: |
280 | 285 | ) |
281 | 286 |
|
282 | 287 |
|
| 288 | +def set_common_sanitizers() -> None: |
| 289 | + """Register sanitizers that will apply to all recordings throughout the SDK.""" |
| 290 | + SANITIZED = "Sanitized" |
| 291 | + batch_sanitizers = {} |
| 292 | + |
| 293 | + # Remove headers from recordings if we don't need them, and ignore them if present |
| 294 | + # Authorization, for example, can contain sensitive info and can cause matching failures during challenge auth |
| 295 | + headers_to_ignore = "Authorization, x-ms-client-request-id, x-ms-request-id" |
| 296 | + set_custom_default_matcher(excluded_headers=headers_to_ignore) |
| 297 | + batch_sanitizers[Sanitizer.REMOVE_HEADER] = [{"headers": headers_to_ignore}] |
| 298 | + |
| 299 | + # Remove OAuth interactions, which can contain client secrets and aren't necessary for playback testing |
| 300 | + batch_sanitizers[Sanitizer.OAUTH_RESPONSE] = [None] |
| 301 | + |
| 302 | + # Body key sanitizers for sensitive fields in JSON requests/responses |
| 303 | + batch_sanitizers[Sanitizer.BODY_KEY] = [ |
| 304 | + {"json_path": "$..access_token", "value": FAKE_ACCESS_TOKEN}, |
| 305 | + {"json_path": "$..AccessToken", "value": FAKE_ACCESS_TOKEN}, |
| 306 | + {"json_path": "$..targetModelLocation", "value": SANITIZED}, |
| 307 | + {"json_path": "$..targetResourceId", "value": SANITIZED}, |
| 308 | + {"json_path": "$..urlSource", "value": SANITIZED}, |
| 309 | + {"json_path": "$..azureBlobSource.containerUrl", "value": SANITIZED}, |
| 310 | + {"json_path": "$..source", "value": SANITIZED}, |
| 311 | + {"json_path": "$..resourceLocation", "value": SANITIZED}, |
| 312 | + {"json_path": "Location", "value": SANITIZED}, |
| 313 | + {"json_path": "$..to", "value": SANITIZED}, |
| 314 | + {"json_path": "$..from", "value": SANITIZED}, |
| 315 | + {"json_path": "$..sasUri", "value": SANITIZED}, |
| 316 | + {"json_path": "$..containerUri", "value": SANITIZED}, |
| 317 | + {"json_path": "$..inputDataUri", "value": SANITIZED}, |
| 318 | + {"json_path": "$..outputDataUri", "value": SANITIZED}, |
| 319 | + {"json_path": "$..id", "value": SANITIZED}, |
| 320 | + {"json_path": "$..token", "value": SANITIZED}, |
| 321 | + {"json_path": "$..appId", "value": SANITIZED}, |
| 322 | + {"json_path": "$..userId", "value": SANITIZED}, |
| 323 | + {"json_path": "$..storageAccount", "value": SANITIZED}, |
| 324 | + {"json_path": "$..resourceGroup", "value": SANITIZED}, |
| 325 | + {"json_path": "$..guardian", "value": SANITIZED}, |
| 326 | + {"json_path": "$..scan", "value": SANITIZED}, |
| 327 | + {"json_path": "$..catalog", "value": SANITIZED}, |
| 328 | + {"json_path": "$..lastModifiedBy", "value": SANITIZED}, |
| 329 | + {"json_path": "$..managedResourceGroupName", "value": SANITIZED}, |
| 330 | + {"json_path": "$..friendlyName", "value": SANITIZED}, |
| 331 | + {"json_path": "$..createdBy", "value": SANITIZED}, |
| 332 | + {"json_path": "$..credential", "value": SANITIZED}, |
| 333 | + {"json_path": "$..aliasPrimaryConnectionString", "value": SANITIZED}, |
| 334 | + {"json_path": "$..aliasSecondaryConnectionString", "value": SANITIZED}, |
| 335 | + {"json_path": "$..connectionString", "value": SANITIZED}, |
| 336 | + {"json_path": "$..primaryConnectionString", "value": SANITIZED}, |
| 337 | + {"json_path": "$..secondaryConnectionString", "value": SANITIZED}, |
| 338 | + {"json_path": "$..sshPassword", "value": SANITIZED}, |
| 339 | + {"json_path": "$..primaryKey", "value": SANITIZED}, |
| 340 | + {"json_path": "$..secondaryKey", "value": SANITIZED}, |
| 341 | + {"json_path": "$..runAsPassword", "value": SANITIZED}, |
| 342 | + {"json_path": "$..adminPassword", "value": SANITIZED}, |
| 343 | + {"json_path": "$..adminPassword.value", "value": SANITIZED}, |
| 344 | + {"json_path": "$..administratorLoginPassword", "value": SANITIZED}, |
| 345 | + {"json_path": "$..accessSAS", "value": SANITIZED}, |
| 346 | + {"json_path": "$..WEBSITE_AUTH_ENCRYPTION_KEY", "value": SANITIZED}, |
| 347 | + {"json_path": "$..storageContainerWriteSas", "value": SANITIZED}, |
| 348 | + {"json_path": "$..storageContainerUri", "value": SANITIZED}, |
| 349 | + {"json_path": "$..storageContainerReadListSas", "value": SANITIZED}, |
| 350 | + {"json_path": "$..storageAccountPrimaryKey", "value": SANITIZED}, |
| 351 | + {"json_path": "$..uploadUrl", "value": SANITIZED}, |
| 352 | + {"json_path": "$..secondaryReadonlyMasterKey", "value": SANITIZED}, |
| 353 | + {"json_path": "$..primaryMasterKey", "value": SANITIZED}, |
| 354 | + {"json_path": "$..primaryReadonlyMasterKey", "value": SANITIZED}, |
| 355 | + {"json_path": "$..secondaryMasterKey", "value": SANITIZED}, |
| 356 | + {"json_path": "$..scriptUrlSasToken", "value": SANITIZED}, |
| 357 | + {"json_path": "$..privateKey", "value": SANITIZED}, |
| 358 | + {"json_path": "$..password", "value": SANITIZED}, |
| 359 | + {"json_path": "$..logLink", "value": SANITIZED}, |
| 360 | + {"json_path": "$..keyVaultClientSecret", "value": SANITIZED}, |
| 361 | + {"json_path": "$..httpHeader", "value": SANITIZED}, |
| 362 | + {"json_path": "$..functionKey", "value": SANITIZED}, |
| 363 | + {"json_path": "$..fencingClientPassword", "value": SANITIZED}, |
| 364 | + {"json_path": "$..encryptedCredential", "value": SANITIZED}, |
| 365 | + {"json_path": "$..clientSecret", "value": SANITIZED}, |
| 366 | + {"json_path": "$..certificatePassword", "value": SANITIZED}, |
| 367 | + {"json_path": "$..authHeader", "value": SANITIZED}, |
| 368 | + {"json_path": "$..atlasKafkaSecondaryEndpoint", "value": SANITIZED}, |
| 369 | + {"json_path": "$..atlasKafkaPrimaryEndpoint", "value": SANITIZED}, |
| 370 | + {"json_path": "$..appkey", "value": SANITIZED}, |
| 371 | + {"json_path": "$..acrToken", "value": SANITIZED}, |
| 372 | + {"json_path": "$..accountKey", "value": SANITIZED}, |
| 373 | + {"json_path": "$..accountName", "value": SANITIZED}, |
| 374 | + {"json_path": "$..decryptionKey", "value": SANITIZED}, |
| 375 | + {"json_path": "$..applicationId", "value": SANITIZED}, |
| 376 | + {"json_path": "$..apiKey", "value": SANITIZED}, |
| 377 | + {"json_path": "$..userName", "value": SANITIZED}, |
| 378 | + {"json_path": "$.properties.DOCKER_REGISTRY_SERVER_PASSWORD", "value": SANITIZED}, |
| 379 | + {"json_path": "$.value[*].key", "value": SANITIZED}, |
| 380 | + {"json_path": "$.key", "value": SANITIZED}, |
| 381 | + {"json_path": "$..clientId", "value": FAKE_ID}, |
| 382 | + {"json_path": "$..principalId", "value": FAKE_ID}, |
| 383 | + {"json_path": "$..tenantId", "value": FAKE_ID}, |
| 384 | + ] |
| 385 | + |
| 386 | + # Body regex sanitizers for sensitive patterns in request/response bodies |
| 387 | + batch_sanitizers[Sanitizer.BODY_REGEX] = [ |
| 388 | + {"regex": "(client_id=)[^&]+", "value": "$1sanitized"}, |
| 389 | + {"regex": "(client_secret=)[^&]+", "value": "$1sanitized"}, |
| 390 | + {"regex": "(client_assertion=)[^&]+", "value": "$1sanitized"}, |
| 391 | + {"regex": "(?:(sv|sig|se|srt|ss|sp)=)(?<secret>(([^&\\s]*)))", "value": SANITIZED}, |
| 392 | + {"regex": "refresh_token=(?<group>.*?)(?=&|$)", "group_for_replace": "group", "value": SANITIZED}, |
| 393 | + {"regex": "access_token=(?<group>.*?)(?=&|$)", "group_for_replace": "group", "value": SANITIZED}, |
| 394 | + {"regex": "token=(?<token>[^\\u0026]+)($|\\u0026)", "group_for_replace": "token", "value": SANITIZED}, |
| 395 | + {"regex": "-----BEGIN PRIVATE KEY-----\\n(.+\\n)*-----END PRIVATE KEY-----\\n", "value": SANITIZED}, |
| 396 | + {"regex": "(?<=<UserDelegationKey>).*?(?:<SignedTid>)(.*)(?:</SignedTid>)", "value": SANITIZED}, |
| 397 | + {"regex": "(?<=<UserDelegationKey>).*?(?:<SignedOid>)(.*)(?:</SignedOid>)", "value": SANITIZED}, |
| 398 | + {"regex": "(?<=<UserDelegationKey>).*?(?:<Value>)(.*)(?:</Value>)", "value": SANITIZED}, |
| 399 | + {"regex": "(?:Password=)(.*?)(?:;)", "value": SANITIZED}, |
| 400 | + {"regex": "(?:User ID=)(.*?)(?:;)", "value": SANITIZED}, |
| 401 | + {"regex": "(?:<PrimaryKey>)(.*)(?:</PrimaryKey>)", "value": SANITIZED}, |
| 402 | + {"regex": "(?:<SecondaryKey>)(.*)(?:</SecondaryKey>)", "value": SANITIZED}, |
| 403 | + ] |
| 404 | + |
| 405 | + # General regex sanitizers for sensitive patterns throughout interactions |
| 406 | + batch_sanitizers[Sanitizer.GENERAL_REGEX] = [ |
| 407 | + {"regex": "SharedAccessKey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED}, |
| 408 | + {"regex": "AccountKey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED}, |
| 409 | + {"regex": "accesskey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED}, |
| 410 | + {"regex": "Accesskey=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED}, |
| 411 | + {"regex": "Secret=(?<key>[^;\\\"]+)", "group_for_replace": "key", "value": SANITIZED}, |
| 412 | + ] |
| 413 | + |
| 414 | + # Header regex sanitizers for sensitive patterns in request/response headers |
| 415 | + batch_sanitizers[Sanitizer.HEADER_REGEX] = [ |
| 416 | + {"key": "subscription-key", "value": SANITIZED}, |
| 417 | + {"key": "x-ms-encryption-key", "value": SANITIZED}, |
| 418 | + {"key": "x-ms-rename-source", "value": SANITIZED}, |
| 419 | + {"key": "x-ms-file-rename-source", "value": SANITIZED}, |
| 420 | + {"key": "x-ms-copy-source", "value": SANITIZED}, |
| 421 | + {"key": "x-ms-copy-source-authorization", "value": SANITIZED}, |
| 422 | + {"key": "x-ms-file-rename-source-authorization", "value": SANITIZED}, |
| 423 | + {"key": "x-ms-encryption-key-sha256", "value": SANITIZED}, |
| 424 | + {"key": "api-key", "value": SANITIZED}, |
| 425 | + {"key": "aeg-sas-token", "value": SANITIZED}, |
| 426 | + {"key": "aeg-sas-key", "value": SANITIZED}, |
| 427 | + {"key": "aeg-channel-name", "value": SANITIZED}, |
| 428 | + {"key": "SupplementaryAuthorization", "value": SERVICEBUS_FAKE_SAS}, |
| 429 | + ] |
| 430 | + |
| 431 | + # URI regex sanitizers for sensitive patterns in request/response URLs |
| 432 | + batch_sanitizers[Sanitizer.URI_REGEX] = [ |
| 433 | + {"regex": "sig=(?<sig>[^&]+)", "group_for_replace": "sig", "value": SANITIZED} |
| 434 | + ] |
| 435 | + |
| 436 | + # Send all the above sanitizers to the test proxy in a single, batch request |
| 437 | + add_batch_sanitizers(sanitizers=batch_sanitizers) |
| 438 | + |
| 439 | + |
283 | 440 | def start_test_proxy(request) -> None: |
284 | 441 | """Starts the test proxy and returns when the proxy server is ready to receive requests. |
285 | 442 |
|
@@ -334,12 +491,7 @@ def start_test_proxy(request) -> None: |
334 | 491 |
|
335 | 492 | # Wait for the proxy server to become available |
336 | 493 | check_proxy_availability() |
337 | | - # Remove headers from recordings if we don't need them, and ignore them if present |
338 | | - # Authorization, for example, can contain sensitive info and can cause matching failures during challenge auth |
339 | | - headers_to_ignore = "Authorization, x-ms-client-request-id, x-ms-request-id" |
340 | | - add_remove_header_sanitizer(headers=headers_to_ignore) |
341 | | - set_custom_default_matcher(excluded_headers=headers_to_ignore) |
342 | | - add_oauth_response_sanitizer() |
| 494 | + set_common_sanitizers() |
343 | 495 |
|
344 | 496 |
|
345 | 497 | def stop_test_proxy() -> None: |
|
0 commit comments