Skip to content

fix: censoring sensitive values from being logged #4818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 5, 2024
25 changes: 20 additions & 5 deletions src/sagemaker/config/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import logging
import sys
from typing import Callable
import re
from copy import deepcopy


def get_sagemaker_config_logger():
Expand Down Expand Up @@ -67,6 +69,19 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
"""
logger = get_sagemaker_config_logger()

source_value_log_copy = deepcopy(source_value)
config_value_log_copy = deepcopy(config_value)

if isinstance(source_value_log_copy, dict):
for key in source_value_log_copy.keys():
if re.search(r"(secret|password|key|token)", key, re.IGNORECASE):
source_value_log_copy[key] = "***"

if isinstance(config_value_log_copy, dict):
for key in config_value_log_copy.keys():
if re.search(r"(secret|password|key|token)", key, re.IGNORECASE):
config_value_log_copy[key] = "***"

if config_value is not None:

if source_value is None:
Expand All @@ -79,7 +94,7 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
logger.debug(
"Applied value\n config key = %s\n config value that will be used = %s",
config_key_path,
config_value,
config_value_log_copy,
)
else:
logger.info(
Expand All @@ -102,8 +117,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
" source value that will be used = %s"
),
config_key_path,
config_value,
source_value,
config_value_log_copy,
source_value_log_copy,
)
elif source_value is not None and config_value != source_value:
# Sagemaker Config had a value defined that is NOT going to be used
Expand All @@ -117,8 +132,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
" source value that will be used = %s",
),
config_key_path,
config_value,
source_value,
config_value_log_copy,
source_value_log_copy,
)
else:
# nothing was specified in the config and nothing is being automatically applied
Expand Down
83 changes: 83 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from __future__ import absolute_import

import copy
import logging
import shutil
import tarfile
from datetime import datetime
Expand Down Expand Up @@ -59,6 +60,7 @@
_validate_new_tags,
remove_tag_with_key,
)
from src.sagemaker.config.config_utils import _log_sagemaker_config_single_substitution
from tests.unit.sagemaker.workflow.helpers import CustomStep
from sagemaker.workflow.parameters import ParameterString, ParameterInteger

Expand Down Expand Up @@ -1279,6 +1281,87 @@ def test_resolve_value_from_config():
mock_info_logger.reset_mock()


class TestLogSagemakerConfig(TestCase):

def test_sensitive_info_masking(self):
logger = logging.getLogger("sagemaker.config")
logger.setLevel(logging.DEBUG)

stream_handler = logging.StreamHandler()
logger.addHandler(stream_handler)

# source value is None
with self.assertLogs(logger, level="DEBUG") as log:
_log_sagemaker_config_single_substitution(
None, {"apiKey": "topsecretkey"}, "config/path"
)

self.assertIn("config value that will be used = {'apiKey': '***'}", log.output[0])

# source value is None and config_value == source_value
with self.assertLogs(logger, level="DEBUG") as log:
_log_sagemaker_config_single_substitution(
{"secretword": "topsecretword"}, {"secretword": "topsecretword"}, "config/path"
)

self.assertIn("Skipped value", log.output[0])
self.assertIn("source value that will be used = {'secretword': '***'}", log.output[0])
self.assertIn("config value = {'secretword': '***'}", log.output[0])

# source value is not None and config_value != source_value
with self.assertLogs(logger, level="DEBUG") as log:
_log_sagemaker_config_single_substitution(
{"password": "supersecretpassword"}, {"apiKey": "topsecretkey"}, "config/path"
)

self.assertIn("Skipped value", log.output[0])
self.assertIn("source value that will be used = {'password': '***'}", log.output[0])
self.assertIn("config value = {'apiKey': '***'}", log.output[0])

def test_non_sensitive_info_masking(self):
logger = logging.getLogger("sagemaker.config")
logger.setLevel(logging.DEBUG)

stream_handler = logging.StreamHandler()
logger.addHandler(stream_handler)

# source value is None
with self.assertLogs(logger, level="DEBUG") as log:
_log_sagemaker_config_single_substitution(
None, {"username": "randomvalue"}, "config/path"
)

self.assertIn("config value that will be used = {'username': 'randomvalue'}", log.output[0])

# source value is not None and config_value == source_value
with self.assertLogs(logger, level="DEBUG") as log:
_log_sagemaker_config_single_substitution(
{"nonsensitivevalue": "randomvalue"},
{"nonsensitivevalue": "randomvalue"},
"config/path",
)

self.assertIn("Skipped value", log.output[0])
self.assertIn(
"source value that will be used = {'nonsensitivevalue': 'randomvalue'}", log.output[0]
)
self.assertIn("config value = {'nonsensitivevalue': 'randomvalue'}", log.output[0])

# source value is not None and config_value != source_value
with self.assertLogs(logger, level="DEBUG") as log:
_log_sagemaker_config_single_substitution(
{"username": "nonsensitiveinfo"},
{"configvalue": "nonsensitivevalue"},
"config/path/non_sensitive",
)

self.assertIn("Skipped value", log.output[0])
self.assertIn(
"source value that will be used = {'username': 'nonsensitiveinfo'}", log.output[0]
)
self.assertIn("config value = {'configvalue': 'nonsensitivevalue'}", log.output[0])


def test_get_sagemaker_config_value():
mock_config_logger = Mock()

Expand Down