diff --git a/src/sagemaker/config/config_utils.py b/src/sagemaker/config/config_utils.py index 70f2764529..e81a620c9a 100644 --- a/src/sagemaker/config/config_utils.py +++ b/src/sagemaker/config/config_utils.py @@ -20,6 +20,8 @@ import logging import sys from typing import Callable +import re +from copy import deepcopy def get_sagemaker_config_logger(): @@ -67,6 +69,19 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config """ logger = get_sagemaker_config_logger() + source_value_log_copy = deepcopy(source_value) + config_value_log_copy = deepcopy(config_value) + + if isinstance(source_value_log_copy, dict): + for key in source_value_log_copy.keys(): + if re.search(r"(secret|password|key|token)", key, re.IGNORECASE): + source_value_log_copy[key] = "***" + + if isinstance(config_value_log_copy, dict): + for key in config_value_log_copy.keys(): + if re.search(r"(secret|password|key|token)", key, re.IGNORECASE): + config_value_log_copy[key] = "***" + if config_value is not None: if source_value is None: @@ -79,7 +94,7 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config logger.debug( "Applied value\n config key = %s\n config value that will be used = %s", config_key_path, - config_value, + config_value_log_copy, ) else: logger.info( @@ -102,8 +117,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config " source value that will be used = %s" ), config_key_path, - config_value, - source_value, + config_value_log_copy, + source_value_log_copy, ) elif source_value is not None and config_value != source_value: # Sagemaker Config had a value defined that is NOT going to be used @@ -117,8 +132,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config " source value that will be used = %s", ), config_key_path, - config_value, - source_value, + config_value_log_copy, + source_value_log_copy, ) else: # nothing was specified in the config and nothing is being automatically applied diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index deb295e6e1..2f0fd77580 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -15,6 +15,7 @@ from __future__ import absolute_import import copy +import logging import shutil import tarfile from datetime import datetime @@ -59,6 +60,7 @@ _validate_new_tags, remove_tag_with_key, ) +from src.sagemaker.config.config_utils import _log_sagemaker_config_single_substitution from tests.unit.sagemaker.workflow.helpers import CustomStep from sagemaker.workflow.parameters import ParameterString, ParameterInteger @@ -1279,6 +1281,87 @@ def test_resolve_value_from_config(): mock_info_logger.reset_mock() +class TestLogSagemakerConfig(TestCase): + + def test_sensitive_info_masking(self): + logger = logging.getLogger("sagemaker.config") + logger.setLevel(logging.DEBUG) + + stream_handler = logging.StreamHandler() + logger.addHandler(stream_handler) + + # source value is None + with self.assertLogs(logger, level="DEBUG") as log: + _log_sagemaker_config_single_substitution( + None, {"apiKey": "topsecretkey"}, "config/path" + ) + + self.assertIn("config value that will be used = {'apiKey': '***'}", log.output[0]) + + # source value is None and config_value == source_value + with self.assertLogs(logger, level="DEBUG") as log: + _log_sagemaker_config_single_substitution( + {"secretword": "topsecretword"}, {"secretword": "topsecretword"}, "config/path" + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn("source value that will be used = {'secretword': '***'}", log.output[0]) + self.assertIn("config value = {'secretword': '***'}", log.output[0]) + + # source value is not None and config_value != source_value + with self.assertLogs(logger, level="DEBUG") as log: + _log_sagemaker_config_single_substitution( + {"password": "supersecretpassword"}, {"apiKey": "topsecretkey"}, "config/path" + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn("source value that will be used = {'password': '***'}", log.output[0]) + self.assertIn("config value = {'apiKey': '***'}", log.output[0]) + + def test_non_sensitive_info_masking(self): + logger = logging.getLogger("sagemaker.config") + logger.setLevel(logging.DEBUG) + + stream_handler = logging.StreamHandler() + logger.addHandler(stream_handler) + + # source value is None + with self.assertLogs(logger, level="DEBUG") as log: + _log_sagemaker_config_single_substitution( + None, {"username": "randomvalue"}, "config/path" + ) + + self.assertIn("config value that will be used = {'username': 'randomvalue'}", log.output[0]) + + # source value is not None and config_value == source_value + with self.assertLogs(logger, level="DEBUG") as log: + _log_sagemaker_config_single_substitution( + {"nonsensitivevalue": "randomvalue"}, + {"nonsensitivevalue": "randomvalue"}, + "config/path", + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn( + "source value that will be used = {'nonsensitivevalue': 'randomvalue'}", log.output[0] + ) + self.assertIn("config value = {'nonsensitivevalue': 'randomvalue'}", log.output[0]) + + # source value is not None and config_value != source_value + with self.assertLogs(logger, level="DEBUG") as log: + _log_sagemaker_config_single_substitution( + {"username": "nonsensitiveinfo"}, + {"configvalue": "nonsensitivevalue"}, + "config/path/non_sensitive", + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn( + "source value that will be used = {'username': 'nonsensitiveinfo'}", log.output[0] + ) + self.assertIn("config value = {'configvalue': 'nonsensitivevalue'}", log.output[0]) + + def test_get_sagemaker_config_value(): mock_config_logger = Mock()