Skip to content

Commit 84b2d5d

Browse files
SNOW-2011595 Masking filter introduced on library levels (#2253)
1 parent 0eca116 commit 84b2d5d

File tree

14 files changed

+235
-62
lines changed

14 files changed

+235
-62
lines changed

.github/workflows/build_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ on:
2020
description: "Test scenario tags"
2121

2222
concurrency:
23-
# older builds for the same pull request numer or branch should be cancelled
23+
# older builds for the same pull request number or branch should be cancelled
2424
cancel-in-progress: true
2525
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
2626

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
2121
- Added `check_arrow_conversion_error_on_every_column` connection property that can be set to `False` to restore previous behaviour in which driver will ignore errors until it occurs in the last column. This flag's purpose is to unblock workflows that may be impacted by the bugfix and will be removed in later releases.
2222
- Lower log levels from info to debug for some of the messages to make the output easier to follow.
2323
- Allow the connector to inherit a UUID4 generated upstream, provided in statement parameters (field: `requestId`), rather than automatically generate a UUID4 to use for the HTTP Request ID.
24+
- Improved logging in urllib3, boto3, botocore - assured data masking even after migration to the external owned library in the future.
2425
- Fix expired S3 credentials update and increment retry when expired credentials are found.
2526
- Added `client_fetch_threads` experimental parameter to better utilize threads for fetching query results.
2627

src/snowflake/connector/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import logging
1313
from logging import NullHandler
1414

15+
from snowflake.connector.externals_utils.externals_setup import setup_external_libraries
16+
1517
from .connection import SnowflakeConnection
1618
from .cursor import DictCursor
1719
from .dbapi import (
@@ -44,6 +46,7 @@
4446
from .version import VERSION
4547

4648
logging.getLogger(__name__).addHandler(NullHandler())
49+
setup_external_libraries()
4750

4851

4952
@wraps(SnowflakeConnection.__init__)

src/snowflake/connector/azure_storage_client.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import os
66
import xml.etree.ElementTree as ET
77
from datetime import datetime, timezone
8-
from logging import Filter, getLogger
8+
from logging import getLogger
99
from random import choice
1010
from string import hexdigits
1111
from typing import TYPE_CHECKING, Any, NamedTuple
@@ -37,22 +37,6 @@ class AzureLocation(NamedTuple):
3737
MATDESC = "x-ms-meta-matdesc"
3838

3939

40-
class AzureCredentialFilter(Filter):
41-
LEAKY_FMT = '%s://%s:%s "%s %s %s" %s %s'
42-
43-
def filter(self, record):
44-
if record.msg == AzureCredentialFilter.LEAKY_FMT and len(record.args) == 8:
45-
record.args = (
46-
record.args[:4] + (record.args[4].split("?")[0],) + record.args[5:]
47-
)
48-
return True
49-
50-
51-
getLogger("snowflake.connector.vendored.urllib3.connectionpool").addFilter(
52-
AzureCredentialFilter()
53-
)
54-
55-
5640
class SnowflakeAzureRestClient(SnowflakeStorageClient):
5741
def __init__(
5842
self,

src/snowflake/connector/cursor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -910,8 +910,8 @@ def execute(
910910
_exec_async: Whether to execute this query asynchronously.
911911
_no_retry: Whether or not to retry on known errors.
912912
_do_reset: Whether or not the result set needs to be reset before executing query.
913-
_put_callback: Function to which GET command should call back to.
914-
_put_azure_callback: Function to which an Azure GET command should call back to.
913+
_put_callback: Function to which PUT command should call back to.
914+
_put_azure_callback: Function to which an Azure PUT command should call back to.
915915
_put_callback_output_stream: The output stream a PUT command's callback should report on.
916916
_get_callback: Function to which GET command should call back to.
917917
_get_azure_callback: Function to which an Azure GET command should call back to.

src/snowflake/connector/externals_utils/__init__.py

Whitespace-only changes.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from __future__ import annotations
2+
3+
from snowflake.connector.logging_utils.filters import (
4+
SecretMaskingFilter,
5+
add_filter_to_logger_and_children,
6+
)
7+
8+
MODULES_TO_MASK_LOGS_NAMES = [
9+
"snowflake.connector.vendored.urllib3",
10+
"botocore",
11+
"boto3",
12+
]
13+
# TODO: after migration to the external urllib3 from the vendored one (SNOW-2041970),
14+
# we should change filters here immediately to the below module's logger:
15+
# MODULES_TO_MASK_LOGS_NAMES = [ "urllib3", ... ]
16+
17+
18+
def add_filters_to_external_loggers():
19+
for module_name in MODULES_TO_MASK_LOGS_NAMES:
20+
add_filter_to_logger_and_children(module_name, SecretMaskingFilter())
21+
22+
23+
def setup_external_libraries():
24+
"""
25+
Assures proper setup and injections before any external libraries are used.
26+
"""
27+
add_filters_to_external_loggers()

src/snowflake/connector/logging_utils/__init__.py

Whitespace-only changes.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
5+
from snowflake.connector.secret_detector import SecretDetector
6+
7+
8+
def add_filter_to_logger_and_children(
9+
base_logger_name: str, filter_instance: logging.Filter
10+
) -> None:
11+
# Ensure the base logger exists and apply filter
12+
base_logger = logging.getLogger(base_logger_name)
13+
if filter_instance not in base_logger.filters:
14+
base_logger.addFilter(filter_instance)
15+
16+
all_loggers_pairs = logging.root.manager.loggerDict.items()
17+
for name, obj in all_loggers_pairs:
18+
if not name.startswith(base_logger_name + "."):
19+
continue
20+
21+
if not isinstance(obj, logging.Logger):
22+
continue # Skip placeholders
23+
24+
if filter_instance not in obj.filters:
25+
obj.addFilter(filter_instance)
26+
27+
28+
class SecretMaskingFilter(logging.Filter):
29+
"""
30+
A logging filter that masks sensitive information in log messages using the SecretDetector utility.
31+
32+
This filter is designed for scenarios where you want to avoid applying SecretDetector globally
33+
as a formatter on all logging handlers. Global masking can introduce unnecessary computational
34+
overhead, particularly for internal logs where secrets are already handled explicitly.
35+
It would be also easy to bypass unintentionally by simply adding a neighbouring handler to a logger
36+
- without SecretDetector set as a formatter.
37+
38+
On the other hand, libraries or submodules often do not have any handler attached, so formatting can't be
39+
configured on those level, while attaching new handler for that can cause unintended log output or its duplication.
40+
41+
⚠ Important:
42+
- Logging filters do **not** propagate down the logger hierarchy.
43+
To apply this filter across a hierarchy, use the `add_filter_to_logger_and_children` utility.
44+
- This filter causes **early formatting** of the log message (`record.getMessage()`),
45+
meaning `record.args` are merged into `record.msg` prematurely.
46+
If you rely on `record.args`, ensure this is the **last** filter in the chain.
47+
48+
Notes:
49+
- The filter directly modifies `record.msg` with the masked version of the message.
50+
- It clears `record.args` to prevent re-formatting and ensure safe message output.
51+
52+
Example:
53+
logger.addFilter(SecretMaskingFilter())
54+
handler.addFilter(SecretMaskingFilter())
55+
"""
56+
57+
def filter(self, record: logging.LogRecord) -> bool:
58+
try:
59+
# Format the message as it would be
60+
message = record.getMessage()
61+
62+
# Run masking on the whole message
63+
masked_data = SecretDetector.mask_secrets(message)
64+
record.msg = masked_data.masked_text
65+
except Exception as ex:
66+
record.msg = SecretDetector.create_formatting_error_log(
67+
record, "EXCEPTION - " + str(ex)
68+
)
69+
finally:
70+
record.args = () # Avoid format re-application of formatting
71+
72+
return True # allow all logs through

src/snowflake/connector/secret_detector.py

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,18 @@
1010
import logging
1111
import os
1212
import re
13+
from typing import NamedTuple
1314

1415
MIN_TOKEN_LEN = os.getenv("MIN_TOKEN_LEN", 32)
1516
MIN_PWD_LEN = os.getenv("MIN_PWD_LEN", 8)
1617

1718

19+
class MaskedMessageData(NamedTuple):
20+
is_masked: bool = False
21+
masked_text: str | None = None
22+
error_str: str | None = None
23+
24+
1825
class SecretDetector(logging.Formatter):
1926
AWS_KEY_PATTERN = re.compile(
2027
r"(aws_key_id|aws_secret_key|access_key_id|secret_access_key)\s*=\s*'([^']+)'",
@@ -48,21 +55,31 @@ class SecretDetector(logging.Formatter):
4855
flags=re.IGNORECASE,
4956
)
5057

58+
SECRET_STARRED_MASK_STR = "****"
59+
5160
@staticmethod
5261
def mask_connection_token(text: str) -> str:
53-
return SecretDetector.CONNECTION_TOKEN_PATTERN.sub(r"\1\2****", text)
62+
return SecretDetector.CONNECTION_TOKEN_PATTERN.sub(
63+
r"\1\2" + f"{SecretDetector.SECRET_STARRED_MASK_STR}", text
64+
)
5465

5566
@staticmethod
5667
def mask_password(text: str) -> str:
57-
return SecretDetector.PASSWORD_PATTERN.sub(r"\1\2****", text)
68+
return SecretDetector.PASSWORD_PATTERN.sub(
69+
r"\1\2" + f"{SecretDetector.SECRET_STARRED_MASK_STR}", text
70+
)
5871

5972
@staticmethod
6073
def mask_aws_keys(text: str) -> str:
61-
return SecretDetector.AWS_KEY_PATTERN.sub(r"\1='****'", text)
74+
return SecretDetector.AWS_KEY_PATTERN.sub(
75+
r"\1=" + f"'{SecretDetector.SECRET_STARRED_MASK_STR}'", text
76+
)
6277

6378
@staticmethod
6479
def mask_sas_tokens(text: str) -> str:
65-
return SecretDetector.SAS_TOKEN_PATTERN.sub(r"\1=****", text)
80+
return SecretDetector.SAS_TOKEN_PATTERN.sub(
81+
r"\1=" + f"{SecretDetector.SECRET_STARRED_MASK_STR}", text
82+
)
6683

6784
@staticmethod
6885
def mask_aws_tokens(text: str) -> str:
@@ -81,17 +98,17 @@ def mask_private_key_data(text: str) -> str:
8198
)
8299

83100
@staticmethod
84-
def mask_secrets(text: str) -> tuple[bool, str, str | None]:
101+
def mask_secrets(text: str) -> MaskedMessageData:
85102
"""Masks any secrets. This is the method that should be used by outside classes.
86103
87104
Args:
88105
text: A string which may contain a secret.
89106
90107
Returns:
91-
The masked string.
108+
The masked string data in MaskedMessageData.
92109
"""
93110
if text is None:
94-
return (False, None, None)
111+
return MaskedMessageData()
95112

96113
masked = False
97114
err_str = None
@@ -119,7 +136,20 @@ def mask_secrets(text: str) -> tuple[bool, str, str | None]:
119136
masked_text = str(ex)
120137
err_str = str(ex)
121138

122-
return masked, masked_text, err_str
139+
return MaskedMessageData(masked, masked_text, err_str)
140+
141+
@staticmethod
142+
def create_formatting_error_log(
143+
original_record: logging.LogRecord, error_message: str
144+
) -> str:
145+
return "{} - {} {} - {} - {} - {}".format(
146+
original_record.asctime,
147+
original_record.threadName,
148+
"secret_detector.py",
149+
"sanitize_log_str",
150+
original_record.levelname,
151+
error_message,
152+
)
123153

124154
def format(self, record: logging.LogRecord) -> str:
125155
"""Wrapper around logging module's formatter.
@@ -134,25 +164,18 @@ def format(self, record: logging.LogRecord) -> str:
134164
"""
135165
try:
136166
unsanitized_log = super().format(record)
137-
masked, sanitized_log, err_str = SecretDetector.mask_secrets(
167+
masked, optional_sanitized_log, err_str = SecretDetector.mask_secrets(
138168
unsanitized_log
139169
)
170+
# Added to comply with type hints (Optional[str] is not accepted for str)
171+
sanitized_log = optional_sanitized_log or ""
172+
140173
if masked and err_str is not None:
141-
sanitized_log = "{} - {} {} - {} - {} - {}".format(
142-
record.asctime,
143-
record.threadName,
144-
"secret_detector.py",
145-
"sanitize_log_str",
146-
record.levelname,
147-
err_str,
148-
)
174+
sanitized_log = self.create_formatting_error_log(record, err_str)
175+
149176
except Exception as ex:
150-
sanitized_log = "{} - {} {} - {} - {} - {}".format(
151-
record.asctime,
152-
record.threadName,
153-
"secret_detector.py",
154-
"sanitize_log_str",
155-
record.levelname,
156-
"EXCEPTION - " + str(ex),
177+
sanitized_log = self.create_formatting_error_log(
178+
record, "EXCEPTION - " + str(ex)
157179
)
180+
158181
return sanitized_log

0 commit comments

Comments
 (0)