Skip to content

Commit adebadc

Browse files
sfc-gh-fpawlowskisfc-gh-pczajka
authored andcommitted
SNOW-2011595 Masking filter introduced on library levels (#2253)
1 parent d42c572 commit adebadc

File tree

13 files changed

+234
-62
lines changed

13 files changed

+234
-62
lines changed

.github/workflows/build_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ on:
2121
description: "Test scenario tags"
2222

2323
concurrency:
24-
# older builds for the same pull request numer or branch should be cancelled
24+
# older builds for the same pull request number or branch should be cancelled
2525
cancel-in-progress: true
2626
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
2727

src/snowflake/connector/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
import logging
1717
from logging import NullHandler
1818

19+
from snowflake.connector.externals_utils.externals_setup import setup_external_libraries
20+
1921
from .connection import SnowflakeConnection
2022
from .cursor import DictCursor
2123
from .dbapi import (
@@ -48,6 +50,7 @@
4850
from .version import VERSION
4951

5052
logging.getLogger(__name__).addHandler(NullHandler())
53+
setup_external_libraries()
5154

5255

5356
@wraps(SnowflakeConnection.__init__)

src/snowflake/connector/azure_storage_client.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import os
1010
import xml.etree.ElementTree as ET
1111
from datetime import datetime, timezone
12-
from logging import Filter, getLogger
12+
from logging import getLogger
1313
from random import choice
1414
from string import hexdigits
1515
from typing import TYPE_CHECKING, Any, NamedTuple
@@ -41,22 +41,6 @@ class AzureLocation(NamedTuple):
4141
MATDESC = "x-ms-meta-matdesc"
4242

4343

44-
class AzureCredentialFilter(Filter):
45-
LEAKY_FMT = '%s://%s:%s "%s %s %s" %s %s'
46-
47-
def filter(self, record):
48-
if record.msg == AzureCredentialFilter.LEAKY_FMT and len(record.args) == 8:
49-
record.args = (
50-
record.args[:4] + (record.args[4].split("?")[0],) + record.args[5:]
51-
)
52-
return True
53-
54-
55-
getLogger("snowflake.connector.vendored.urllib3.connectionpool").addFilter(
56-
AzureCredentialFilter()
57-
)
58-
59-
6044
class SnowflakeAzureRestClient(SnowflakeStorageClient):
6145
def __init__(
6246
self,

src/snowflake/connector/cursor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -888,8 +888,8 @@ def execute(
888888
_exec_async: Whether to execute this query asynchronously.
889889
_no_retry: Whether or not to retry on known errors.
890890
_do_reset: Whether or not the result set needs to be reset before executing query.
891-
_put_callback: Function to which GET command should call back to.
892-
_put_azure_callback: Function to which an Azure GET command should call back to.
891+
_put_callback: Function to which PUT command should call back to.
892+
_put_azure_callback: Function to which an Azure PUT command should call back to.
893893
_put_callback_output_stream: The output stream a PUT command's callback should report on.
894894
_get_callback: Function to which GET command should call back to.
895895
_get_azure_callback: Function to which an Azure GET command should call back to.

src/snowflake/connector/externals_utils/__init__.py

Whitespace-only changes.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from __future__ import annotations
2+
3+
from snowflake.connector.logging_utils.filters import (
4+
SecretMaskingFilter,
5+
add_filter_to_logger_and_children,
6+
)
7+
8+
MODULES_TO_MASK_LOGS_NAMES = [
9+
"snowflake.connector.vendored.urllib3",
10+
"botocore",
11+
"boto3",
12+
]
13+
# TODO: after migration to the external urllib3 from the vendored one (SNOW-2041970),
14+
# we should change filters here immediately to the below module's logger:
15+
# MODULES_TO_MASK_LOGS_NAMES = [ "urllib3", ... ]
16+
17+
18+
def add_filters_to_external_loggers():
19+
for module_name in MODULES_TO_MASK_LOGS_NAMES:
20+
add_filter_to_logger_and_children(module_name, SecretMaskingFilter())
21+
22+
23+
def setup_external_libraries():
24+
"""
25+
Assures proper setup and injections before any external libraries are used.
26+
"""
27+
add_filters_to_external_loggers()

src/snowflake/connector/logging_utils/__init__.py

Whitespace-only changes.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
5+
from snowflake.connector.secret_detector import SecretDetector
6+
7+
8+
def add_filter_to_logger_and_children(
9+
base_logger_name: str, filter_instance: logging.Filter
10+
) -> None:
11+
# Ensure the base logger exists and apply filter
12+
base_logger = logging.getLogger(base_logger_name)
13+
if filter_instance not in base_logger.filters:
14+
base_logger.addFilter(filter_instance)
15+
16+
all_loggers_pairs = logging.root.manager.loggerDict.items()
17+
for name, obj in all_loggers_pairs:
18+
if not name.startswith(base_logger_name + "."):
19+
continue
20+
21+
if not isinstance(obj, logging.Logger):
22+
continue # Skip placeholders
23+
24+
if filter_instance not in obj.filters:
25+
obj.addFilter(filter_instance)
26+
27+
28+
class SecretMaskingFilter(logging.Filter):
29+
"""
30+
A logging filter that masks sensitive information in log messages using the SecretDetector utility.
31+
32+
This filter is designed for scenarios where you want to avoid applying SecretDetector globally
33+
as a formatter on all logging handlers. Global masking can introduce unnecessary computational
34+
overhead, particularly for internal logs where secrets are already handled explicitly.
35+
It would be also easy to bypass unintentionally by simply adding a neighbouring handler to a logger
36+
- without SecretDetector set as a formatter.
37+
38+
On the other hand, libraries or submodules often do not have any handler attached, so formatting can't be
39+
configured on those level, while attaching new handler for that can cause unintended log output or its duplication.
40+
41+
⚠ Important:
42+
- Logging filters do **not** propagate down the logger hierarchy.
43+
To apply this filter across a hierarchy, use the `add_filter_to_logger_and_children` utility.
44+
- This filter causes **early formatting** of the log message (`record.getMessage()`),
45+
meaning `record.args` are merged into `record.msg` prematurely.
46+
If you rely on `record.args`, ensure this is the **last** filter in the chain.
47+
48+
Notes:
49+
- The filter directly modifies `record.msg` with the masked version of the message.
50+
- It clears `record.args` to prevent re-formatting and ensure safe message output.
51+
52+
Example:
53+
logger.addFilter(SecretMaskingFilter())
54+
handler.addFilter(SecretMaskingFilter())
55+
"""
56+
57+
def filter(self, record: logging.LogRecord) -> bool:
58+
try:
59+
# Format the message as it would be
60+
message = record.getMessage()
61+
62+
# Run masking on the whole message
63+
masked_data = SecretDetector.mask_secrets(message)
64+
record.msg = masked_data.masked_text
65+
except Exception as ex:
66+
record.msg = SecretDetector.create_formatting_error_log(
67+
record, "EXCEPTION - " + str(ex)
68+
)
69+
finally:
70+
record.args = () # Avoid format re-application of formatting
71+
72+
return True # allow all logs through

src/snowflake/connector/secret_detector.py

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,18 @@
1414
import logging
1515
import os
1616
import re
17+
from typing import NamedTuple
1718

1819
MIN_TOKEN_LEN = os.getenv("MIN_TOKEN_LEN", 32)
1920
MIN_PWD_LEN = os.getenv("MIN_PWD_LEN", 8)
2021

2122

23+
class MaskedMessageData(NamedTuple):
24+
is_masked: bool = False
25+
masked_text: str | None = None
26+
error_str: str | None = None
27+
28+
2229
class SecretDetector(logging.Formatter):
2330
AWS_KEY_PATTERN = re.compile(
2431
r"(aws_key_id|aws_secret_key|access_key_id|secret_access_key)\s*=\s*'([^']+)'",
@@ -52,21 +59,31 @@ class SecretDetector(logging.Formatter):
5259
flags=re.IGNORECASE,
5360
)
5461

62+
SECRET_STARRED_MASK_STR = "****"
63+
5564
@staticmethod
5665
def mask_connection_token(text: str) -> str:
57-
return SecretDetector.CONNECTION_TOKEN_PATTERN.sub(r"\1\2****", text)
66+
return SecretDetector.CONNECTION_TOKEN_PATTERN.sub(
67+
r"\1\2" + f"{SecretDetector.SECRET_STARRED_MASK_STR}", text
68+
)
5869

5970
@staticmethod
6071
def mask_password(text: str) -> str:
61-
return SecretDetector.PASSWORD_PATTERN.sub(r"\1\2****", text)
72+
return SecretDetector.PASSWORD_PATTERN.sub(
73+
r"\1\2" + f"{SecretDetector.SECRET_STARRED_MASK_STR}", text
74+
)
6275

6376
@staticmethod
6477
def mask_aws_keys(text: str) -> str:
65-
return SecretDetector.AWS_KEY_PATTERN.sub(r"\1='****'", text)
78+
return SecretDetector.AWS_KEY_PATTERN.sub(
79+
r"\1=" + f"'{SecretDetector.SECRET_STARRED_MASK_STR}'", text
80+
)
6681

6782
@staticmethod
6883
def mask_sas_tokens(text: str) -> str:
69-
return SecretDetector.SAS_TOKEN_PATTERN.sub(r"\1=****", text)
84+
return SecretDetector.SAS_TOKEN_PATTERN.sub(
85+
r"\1=" + f"{SecretDetector.SECRET_STARRED_MASK_STR}", text
86+
)
7087

7188
@staticmethod
7289
def mask_aws_tokens(text: str) -> str:
@@ -85,17 +102,17 @@ def mask_private_key_data(text: str) -> str:
85102
)
86103

87104
@staticmethod
88-
def mask_secrets(text: str) -> tuple[bool, str, str | None]:
105+
def mask_secrets(text: str) -> MaskedMessageData:
89106
"""Masks any secrets. This is the method that should be used by outside classes.
90107
91108
Args:
92109
text: A string which may contain a secret.
93110
94111
Returns:
95-
The masked string.
112+
The masked string data in MaskedMessageData.
96113
"""
97114
if text is None:
98-
return (False, None, None)
115+
return MaskedMessageData()
99116

100117
masked = False
101118
err_str = None
@@ -123,7 +140,20 @@ def mask_secrets(text: str) -> tuple[bool, str, str | None]:
123140
masked_text = str(ex)
124141
err_str = str(ex)
125142

126-
return masked, masked_text, err_str
143+
return MaskedMessageData(masked, masked_text, err_str)
144+
145+
@staticmethod
146+
def create_formatting_error_log(
147+
original_record: logging.LogRecord, error_message: str
148+
) -> str:
149+
return "{} - {} {} - {} - {} - {}".format(
150+
original_record.asctime,
151+
original_record.threadName,
152+
"secret_detector.py",
153+
"sanitize_log_str",
154+
original_record.levelname,
155+
error_message,
156+
)
127157

128158
def format(self, record: logging.LogRecord) -> str:
129159
"""Wrapper around logging module's formatter.
@@ -138,25 +168,18 @@ def format(self, record: logging.LogRecord) -> str:
138168
"""
139169
try:
140170
unsanitized_log = super().format(record)
141-
masked, sanitized_log, err_str = SecretDetector.mask_secrets(
171+
masked, optional_sanitized_log, err_str = SecretDetector.mask_secrets(
142172
unsanitized_log
143173
)
174+
# Added to comply with type hints (Optional[str] is not accepted for str)
175+
sanitized_log = optional_sanitized_log or ""
176+
144177
if masked and err_str is not None:
145-
sanitized_log = "{} - {} {} - {} - {} - {}".format(
146-
record.asctime,
147-
record.threadName,
148-
"secret_detector.py",
149-
"sanitize_log_str",
150-
record.levelname,
151-
err_str,
152-
)
178+
sanitized_log = self.create_formatting_error_log(record, err_str)
179+
153180
except Exception as ex:
154-
sanitized_log = "{} - {} {} - {} - {} - {}".format(
155-
record.asctime,
156-
record.threadName,
157-
"secret_detector.py",
158-
"sanitize_log_str",
159-
record.levelname,
160-
"EXCEPTION - " + str(ex),
181+
sanitized_log = self.create_formatting_error_log(
182+
record, "EXCEPTION - " + str(ex)
161183
)
184+
162185
return sanitized_log

test/integ/conftest.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,22 @@ def init_test_schema(db_parameters) -> Generator[None]:
173173
174174
This is automatically called per test session.
175175
"""
176-
ret = db_parameters
177-
with snowflake.connector.connect(
178-
user=ret["user"],
179-
password=ret["password"],
180-
host=ret["host"],
181-
port=ret["port"],
182-
database=ret["database"],
183-
account=ret["account"],
184-
protocol=ret["protocol"],
185-
) as con:
176+
connection_params = {
177+
"user": db_parameters["user"],
178+
"password": db_parameters["password"],
179+
"host": db_parameters["host"],
180+
"port": db_parameters["port"],
181+
"database": db_parameters["database"],
182+
"account": db_parameters["account"],
183+
"protocol": db_parameters["protocol"],
184+
}
185+
186+
# Role may be needed when running on preprod, but is not present on Jenkins jobs
187+
optional_role = db_parameters.get("role")
188+
if optional_role is not None:
189+
connection_params.update(role=optional_role)
190+
191+
with snowflake.connector.connect(**connection_params) as con:
186192
con.cursor().execute(f"CREATE SCHEMA IF NOT EXISTS {TEST_SCHEMA}")
187193
yield
188194
con.cursor().execute(f"DROP SCHEMA IF EXISTS {TEST_SCHEMA}")

0 commit comments

Comments
 (0)