Skip to content

fix: censoring sensitive values from being logged #4818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 5, 2024
27 changes: 21 additions & 6 deletions src/sagemaker/config/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import logging
import sys
from typing import Callable
import re
from copy import deepcopy


def get_sagemaker_config_logger():
Expand Down Expand Up @@ -67,6 +69,19 @@
"""
logger = get_sagemaker_config_logger()

source_value_log_copy = deepcopy(source_value)
config_value_log_copy = deepcopy(config_value)

if isinstance(source_value_log_copy, dict):
for key in source_value_log_copy.keys():
if re.search(r'(secret|password|key|token)', key, re.IGNORECASE):
source_value_log_copy[key] = '***'

if isinstance(config_value_log_copy, dict):
for key in config_value_log_copy.keys():
if re.search(r'(secret|password|key|token)', key, re.IGNORECASE):
config_value_log_copy[key] = '***'

if config_value is not None:

if source_value is None:
Expand All @@ -79,7 +94,7 @@
logger.debug(
"Applied value\n config key = %s\n config value that will be used = %s",
config_key_path,
config_value,
config_value_log_copy,
)
else:
logger.info(
Expand All @@ -102,8 +117,8 @@
" source value that will be used = %s"
),
config_key_path,
config_value,
source_value,
config_value_log_copy,
source_value_log_copy,
)
elif source_value is not None and config_value != source_value:
# Sagemaker Config had a value defined that is NOT going to be used
Expand All @@ -117,13 +132,13 @@
" source value that will be used = %s",
),
config_key_path,
config_value,
source_value,
config_value_log_copy,
source_value_log_copy,
)
else:
# nothing was specified in the config and nothing is being automatically applied
logger.debug("Skipped value because no value defined\n config key = %s", config_key_path)


def _log_sagemaker_config_merge(
source_value=None,
Expand Down
86 changes: 86 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
_validate_new_tags,
remove_tag_with_key,
)
from src.sagemaker.config.config_utils import _log_sagemaker_config_single_substitution
from tests.unit.sagemaker.workflow.helpers import CustomStep
from sagemaker.workflow.parameters import ParameterString, ParameterInteger

Expand Down Expand Up @@ -1279,6 +1280,91 @@ def test_resolve_value_from_config():
mock_info_logger.reset_mock()


class TestLogSagemakerConfig(TestCase):

def test_sensitive_info_masking(self):
logger = logging.getLogger('sagemaker.config')
logger.setLevel(logging.DEBUG)

stream_handler = logging.StreamHandler()
logger.addHandler(stream_handler)

# source value is None
with self.assertLogs(logger, level='DEBUG') as log:
_log_sagemaker_config_single_substitution(
None,
{"apiKey": "topsecretkey"},
"config/path"
)

self.assertIn("config value that will be used = {'apiKey': '***'}", log.output[0])

# source value is None and config_value == source_value
with self.assertLogs(logger, level='DEBUG') as log:
_log_sagemaker_config_single_substitution(
{"secretword": "topsecretword"},
{"secretword": "topsecretword"},
"config/path"
)

self.assertIn("Skipped value", log.output[0])
self.assertIn("source value that will be used = {'secretword': '***'}", log.output[0])
self.assertIn("config value = {'secretword': '***'}", log.output[0])

# source value is not None and config_value != source_value
with self.assertLogs(logger, level='DEBUG') as log:
_log_sagemaker_config_single_substitution(
{"password": "supersecretpassword"},
{"apiKey": "topsecretkey"},
"config/path"
)

self.assertIn("Skipped value", log.output[0])
self.assertIn("source value that will be used = {'password': '***'}", log.output[0])
self.assertIn("config value = {'apiKey': '***'}", log.output[0])

def test_non_sensitive_info_masking(self):
logger = logging.getLogger('sagemaker.config')
logger.setLevel(logging.DEBUG)

stream_handler = logging.StreamHandler()
logger.addHandler(stream_handler)

# source value is None
with self.assertLogs(logger, level='DEBUG') as log:
_log_sagemaker_config_single_substitution(
None,
{"username": "randomvalue"},
"config/path"
)

self.assertIn("config value that will be used = {'username': 'randomvalue'}", log.output[0])

# source value is not None and config_value == source_value
with self.assertLogs(logger, level='DEBUG') as log:
_log_sagemaker_config_single_substitution(
{"nonsensitivevalue": "randomvalue"},
{"nonsensitivevalue": "randomvalue"},
"config/path"
)

self.assertIn("Skipped value", log.output[0])
self.assertIn("source value that will be used = {'nonsensitivevalue': 'randomvalue'}", log.output[0])
self.assertIn("config value = {'nonsensitivevalue': 'randomvalue'}", log.output[0])

# source value is not None and config_value != source_value
with self.assertLogs(logger, level='DEBUG') as log:
_log_sagemaker_config_single_substitution(
{"username": "nonsensitiveinfo"},
{"configvalue": "nonsensitivevalue"},
"config/path/non_sensitive"
)

self.assertIn("Skipped value", log.output[0])
self.assertIn("source value that will be used = {'username': 'nonsensitiveinfo'}", log.output[0])
self.assertIn("config value = {'configvalue': 'nonsensitivevalue'}", log.output[0])


def test_get_sagemaker_config_value():
mock_config_logger = Mock()

Expand Down
Loading