Skip to content

Commit dfa933b

Browse files
fix: censoring sensitive values from being logged
1 parent cc7399c commit dfa933b

File tree

10 files changed

+130
-16
lines changed

10 files changed

+130
-16
lines changed

.github/workflows/codeql.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@ jobs:
1111
name: Analyze (${{ matrix.language }})
1212
runs-on: ${{ 'ubuntu-latest' }}
1313
permissions:
14-
# required for all workflows
1514
security-events: write
16-
# required to fetch internal or private CodeQL packs
1715
packages: read
1816

1917
strategy:
@@ -25,14 +23,13 @@ jobs:
2523
build-mode: none
2624
steps:
2725
- name: Checkout repository
28-
uses: actions/checkout@v4
29-
# Initializes the CodeQL tools for scanning.
26+
uses: actions/checkout@6ccd57f4c5d15bdc2fef309bd9fb6cc9db2ef1c6
3027
- name: Initialize CodeQL
31-
uses: github/codeql-action/init@v3
28+
uses: github/codeql-action/init@4b1d7da102ff94aca014c0245062b1a463356d72
3229
with:
3330
languages: ${{ matrix.language }}
3431
build-mode: ${{ matrix.build-mode }}
3532
- name: Perform CodeQL Analysis
36-
uses: github/codeql-action/analyze@v3
33+
uses: github/codeql-action/analyze@4b1d7da102ff94aca014c0245062b1a463356d72
3734
with:
3835
category: "/language:${{matrix.language}}"

src/sagemaker/config/config_utils.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
from collections import deque
1919

2020
import logging
21+
import re
2122
import sys
2223
from typing import Callable
24+
from copy import deepcopy
2325

2426

2527
def get_sagemaker_config_logger():
@@ -67,6 +69,19 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
6769
"""
6870
logger = get_sagemaker_config_logger()
6971

72+
source_value_log_copy = deepcopy(source_value)
73+
config_value_log_copy = deepcopy(config_value)
74+
75+
if isinstance(source_value_log_copy, dict):
76+
for key in source_value_log_copy.keys():
77+
if re.search(r'(secret|password|key|token)', key, re.IGNORECASE):
78+
source_value_log_copy[key] = '***'
79+
80+
if isinstance(config_value_log_copy, dict):
81+
for key in config_value_log_copy.keys():
82+
if re.search(r'(secret|password|key|token)', key, re.IGNORECASE):
83+
config_value_log_copy[key] = '***'
84+
7085
if config_value is not None:
7186

7287
if source_value is None:
@@ -79,7 +94,7 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
7994
logger.debug(
8095
"Applied value\n config key = %s\n config value that will be used = %s",
8196
config_key_path,
82-
config_value,
97+
config_value_log_copy,
8398
)
8499
else:
85100
logger.info(
@@ -102,8 +117,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
102117
" source value that will be used = %s"
103118
),
104119
config_key_path,
105-
config_value,
106-
source_value,
120+
config_value_log_copy,
121+
source_value_log_copy,
107122
)
108123
elif source_value is not None and config_value != source_value:
109124
# Sagemaker Config had a value defined that is NOT going to be used
@@ -117,8 +132,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config
117132
" source value that will be used = %s",
118133
),
119134
config_key_path,
120-
config_value,
121-
source_value,
135+
config_value_log_copy,
136+
source_value_log_copy,
122137
)
123138
else:
124139
# nothing was specified in the config and nothing is being automatically applied

src/sagemaker/fw_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@
152152
"2.1.0",
153153
"2.1.2",
154154
"2.2.0",
155-
"2.3.0",
156155
"2.3.1",
157156
]
158157

src/sagemaker/image_uri_config/pytorch-smp.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"2.1": "2.1.2",
99
"2.2": "2.3.1",
1010
"2.2.0": "2.3.1",
11-
"2.3": "2.4.0"
11+
"2.3.1": "2.4.0"
1212
},
1313
"versions": {
1414
"2.0.1": {

src/sagemaker/image_uri_config/pytorch.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,7 @@
10081008
"us-gov-west-1": "442386744353",
10091009
"us-iso-east-1": "886529160074",
10101010
"us-isob-east-1": "094389454867",
1011+
"us-isof-east-1": "303241398832",
10111012
"us-isof-south-1": "454834333376",
10121013
"us-west-1": "763104351884",
10131014
"us-west-2": "763104351884"
@@ -1052,6 +1053,7 @@
10521053
"us-gov-west-1": "442386744353",
10531054
"us-iso-east-1": "886529160074",
10541055
"us-isob-east-1": "094389454867",
1056+
"us-isof-east-1": "303241398832",
10551057
"us-isof-south-1": "454834333376",
10561058
"us-west-1": "763104351884",
10571059
"us-west-2": "763104351884"
@@ -2331,6 +2333,7 @@
23312333
"us-gov-west-1": "442386744353",
23322334
"us-iso-east-1": "886529160074",
23332335
"us-isob-east-1": "094389454867",
2336+
"us-isof-east-1": "303241398832",
23342337
"us-isof-south-1": "454834333376",
23352338
"us-west-1": "763104351884",
23362339
"us-west-2": "763104351884"
@@ -2419,6 +2422,7 @@
24192422
"us-gov-west-1": "442386744353",
24202423
"us-iso-east-1": "886529160074",
24212424
"us-isob-east-1": "094389454867",
2425+
"us-isof-east-1": "303241398832",
24222426
"us-isof-south-1": "454834333376",
24232427
"us-west-1": "763104351884",
24242428
"us-west-2": "763104351884"

src/sagemaker/image_uri_config/tensorflow.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2140,6 +2140,7 @@
21402140
"us-gov-west-1": "442386744353",
21412141
"us-iso-east-1": "886529160074",
21422142
"us-isob-east-1": "094389454867",
2143+
"us-isof-east-1": "303241398832",
21432144
"us-isof-south-1": "454834333376",
21442145
"us-west-1": "763104351884",
21452146
"us-west-2": "763104351884"
@@ -2181,6 +2182,7 @@
21812182
"us-gov-west-1": "442386744353",
21822183
"us-iso-east-1": "886529160074",
21832184
"us-isob-east-1": "094389454867",
2185+
"us-isof-east-1": "303241398832",
21842186
"us-isof-south-1": "454834333376",
21852187
"us-west-1": "763104351884",
21862188
"us-west-2": "763104351884"
@@ -4354,6 +4356,7 @@
43544356
"us-gov-west-1": "442386744353",
43554357
"us-iso-east-1": "886529160074",
43564358
"us-isob-east-1": "094389454867",
4359+
"us-isof-east-1": "303241398832",
43574360
"us-isof-south-1": "454834333376",
43584361
"us-west-1": "763104351884",
43594362
"us-west-2": "763104351884"

src/sagemaker/session.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8348,7 +8348,9 @@ def _logs_for_job( # noqa: C901 - suppress complexity warning for this method
83488348
"""
83498349
sagemaker_client = sagemaker_session.sagemaker_client
83508350
request_end_time = time.time() + timeout if timeout else None
8351-
description = sagemaker_client.describe_training_job(TrainingJobName=job_name)
8351+
description = _wait_until(
8352+
lambda: sagemaker_client.describe_training_job(TrainingJobName=job_name)
8353+
)
83528354
print(secondary_training_status_message(description, None), end="")
83538355

83548356
instance_count, stream_names, positions, client, log_group, dot, color_wrap = _logs_init(

tests/data/marketplace/iris/scoring_logic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import re
55
from flask import Flask
6-
from flask import request
6+
from flask import request, escape
77
from joblib import dump, load
88
import numpy as np
99
import os
@@ -106,4 +106,4 @@ def endpoint_invocations():
106106

107107
return response
108108
except Exception as e:
109-
return f"Error during model invocation: {str(e)} for input: {request.get_data()}"
109+
return f"Error during model invocation: {str(e)} for input: {escape(request.get_data())}"

tests/unit/sagemaker/image_uris/test_smp_v2.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def test_smp_v2(load_config):
2727
"torch_distributed": {"enabled": True},
2828
"smdistributed": {"modelparallel": {"enabled": True}},
2929
}
30+
3031
for processor in PROCESSORS:
3132
for version in VERSIONS:
3233
ACCOUNTS = load_config["training"]["versions"][version]["registries"]
@@ -38,6 +39,11 @@ def test_smp_v2(load_config):
3839
if "2.1" in version or "2.2" in version or "2.3" in version:
3940
cuda_vers = "cu121"
4041

42+
if "2.3.1" == version:
43+
py_version = "py311"
44+
45+
print(version, py_version)
46+
4147
uri = image_uris.get_training_image_uri(
4248
region,
4349
framework="pytorch",

tests/unit/test_utils.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import absolute_import
1616

1717
import copy
18+
import logging
1819
import shutil
1920
import tarfile
2021
from datetime import datetime
@@ -59,6 +60,8 @@
5960
_validate_new_tags,
6061
remove_tag_with_key,
6162
)
63+
64+
from src.sagemaker.config.config_utils import _log_sagemaker_config_single_substitution
6265
from tests.unit.sagemaker.workflow.helpers import CustomStep
6366
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
6467

@@ -1279,6 +1282,91 @@ def test_resolve_value_from_config():
12791282
mock_info_logger.reset_mock()
12801283

12811284

1285+
class TestLogSagemakerConfig(TestCase):
1286+
1287+
def test_sensitive_info_masking(self):
1288+
logger = logging.getLogger('sagemaker.config')
1289+
logger.setLevel(logging.DEBUG)
1290+
1291+
stream_handler = logging.StreamHandler()
1292+
logger.addHandler(stream_handler)
1293+
1294+
# source value is None
1295+
with self.assertLogs(logger, level='DEBUG') as log:
1296+
_log_sagemaker_config_single_substitution(
1297+
None,
1298+
{"apiKey": "topsecretkey"},
1299+
"config/path"
1300+
)
1301+
1302+
self.assertIn("config value that will be used = {'apiKey': '***'}", log.output[0])
1303+
1304+
# source value is None and config_value == source_value
1305+
with self.assertLogs(logger, level='DEBUG') as log:
1306+
_log_sagemaker_config_single_substitution(
1307+
{"secretword": "topsecretword"},
1308+
{"secretword": "topsecretword"},
1309+
"config/path"
1310+
)
1311+
1312+
self.assertIn("Skipped value", log.output[0])
1313+
self.assertIn("source value that will be used = {'secretword': '***'}", log.output[0])
1314+
self.assertIn("config value = {'secretword': '***'}", log.output[0])
1315+
1316+
# source value is not None and config_value != source_value
1317+
with self.assertLogs(logger, level='DEBUG') as log:
1318+
_log_sagemaker_config_single_substitution(
1319+
{"password": "supersecretpassword"},
1320+
{"apiKey": "topsecretkey"},
1321+
"config/path"
1322+
)
1323+
1324+
self.assertIn("Skipped value", log.output[0])
1325+
self.assertIn("source value that will be used = {'password': '***'}", log.output[0])
1326+
self.assertIn("config value = {'apiKey': '***'}", log.output[0])
1327+
1328+
def test_non_sensitive_info_masking(self):
1329+
logger = logging.getLogger('sagemaker.config')
1330+
logger.setLevel(logging.DEBUG)
1331+
1332+
stream_handler = logging.StreamHandler()
1333+
logger.addHandler(stream_handler)
1334+
1335+
# source value is None
1336+
with self.assertLogs(logger, level='DEBUG') as log:
1337+
_log_sagemaker_config_single_substitution(
1338+
None,
1339+
{"username": "randomvalue"},
1340+
"config/path"
1341+
)
1342+
1343+
self.assertIn("config value that will be used = {'username': 'randomvalue'}", log.output[0])
1344+
1345+
# source value is not None and config_value == source_value
1346+
with self.assertLogs(logger, level='DEBUG') as log:
1347+
_log_sagemaker_config_single_substitution(
1348+
{"nonsensitivevalue": "randomvalue"},
1349+
{"nonsensitivevalue": "randomvalue"},
1350+
"config/path"
1351+
)
1352+
1353+
self.assertIn("Skipped value", log.output[0])
1354+
self.assertIn("source value that will be used = {'nonsensitivevalue': 'randomvalue'}", log.output[0])
1355+
self.assertIn("config value = {'nonsensitivevalue': 'randomvalue'}", log.output[0])
1356+
1357+
# source value is not None and config_value != source_value
1358+
with self.assertLogs(logger, level='DEBUG') as log:
1359+
_log_sagemaker_config_single_substitution(
1360+
{"username": "nonsensitiveinfo"},
1361+
{"configvalue": "nonsensitivevalue"},
1362+
"config/path/non_sensitive"
1363+
)
1364+
1365+
self.assertIn("Skipped value", log.output[0])
1366+
self.assertIn("source value that will be used = {'username': 'nonsensitiveinfo'}", log.output[0])
1367+
self.assertIn("config value = {'configvalue': 'nonsensitivevalue'}", log.output[0])
1368+
1369+
12821370
def test_get_sagemaker_config_value():
12831371
mock_config_logger = Mock()
12841372

0 commit comments

Comments
 (0)