Skip to content

Commit b48434d

Browse files
Review fixes - add secret filter to external libraries + tests
1 parent 08791bb commit b48434d

File tree

4 files changed

+98
-13
lines changed

4 files changed

+98
-13
lines changed

src/snowflake/connector/externals_utils/externals_setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
"snowflake.connector.vendored.urllib3",
1010
"botocore",
1111
"boto3",
12+
"aiohttp", # this should not break even if [aio] extra is not installed - in such case logger will remain unused
13+
"aiobotocore",
14+
"aioboto3",
1215
]
1316
# TODO: after migration to the external urllib3 from the vendored one (SNOW-2041970),
1417
# we should change filters here immediately to the below module's logger:

test/integ/aio/test_large_result_set_async.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55

66
from __future__ import annotations
77

8-
from unittest.mock import Mock
8+
import logging
99

1010
import pytest
1111

12+
from snowflake.connector.secret_detector import SecretDetector
1213
from snowflake.connector.telemetry import TelemetryField
1314

1415
NUMBER_OF_ROWS = 50000
@@ -18,7 +19,9 @@
1819

1920
@pytest.fixture()
2021
async def ingest_data(request, conn_cnx, db_parameters):
21-
async with conn_cnx() as cnx:
22+
async with conn_cnx(
23+
session_parameters={"python_connector_query_result_format": "json"},
24+
) as cnx:
2225
await cnx.cursor().execute(
2326
"""
2427
create or replace table {name} (
@@ -89,7 +92,12 @@ async def test_query_large_result_set_n_threads(
8992
conn_cnx, db_parameters, ingest_data, num_threads
9093
):
9194
sql = "select * from {name} order by 1".format(name=db_parameters["name"])
92-
async with conn_cnx(client_prefetch_threads=num_threads) as cnx:
95+
async with conn_cnx(
96+
client_prefetch_threads=num_threads,
97+
session_parameters={
98+
"python_connector_query_result_format": "json",
99+
},
100+
) as cnx:
93101
assert cnx.client_prefetch_threads == num_threads
94102
results = []
95103
async for rec in await cnx.cursor().execute(sql):
@@ -102,13 +110,26 @@ async def test_query_large_result_set_n_threads(
102110

103111
@pytest.mark.aws
104112
@pytest.mark.skipolddriver
105-
async def test_query_large_result_set(conn_cnx, db_parameters, ingest_data):
113+
async def test_query_large_result_set(conn_cnx, db_parameters, ingest_data, caplog):
106114
"""[s3] Gets Large Result set."""
115+
caplog.set_level(logging.DEBUG)
116+
caplog.set_level(logging.DEBUG, logger="snowflake.connector.vendored.urllib3")
117+
caplog.set_level(
118+
logging.DEBUG, logger="snowflake.connector.vendored.urllib3.connectionpool"
119+
)
120+
caplog.set_level(logging.DEBUG, logger="aiohttp")
121+
caplog.set_level(logging.DEBUG, logger="aiohttp.client")
107122
sql = "select * from {name} order by 1".format(name=db_parameters["name"])
108-
async with conn_cnx() as cnx:
123+
async with conn_cnx(
124+
session_parameters={
125+
"python_connector_query_result_format": "json",
126+
}
127+
) as cnx:
109128
telemetry_data = []
110-
add_log_mock = Mock()
111-
add_log_mock.side_effect = lambda datum: telemetry_data.append(datum)
129+
130+
async def add_log_mock(datum):
131+
telemetry_data.append(datum)
132+
112133
cnx._telemetry.add_log_to_batch = add_log_mock
113134

114135
result2 = []
@@ -152,3 +173,20 @@ async def test_query_large_result_set(conn_cnx, db_parameters, ingest_data):
152173
"Expected three telemetry logs (one per query) "
153174
"for log type {}".format(field.value)
154175
)
176+
177+
aws_request_present = False
178+
expected_token_prefix = "X-Amz-Signature="
179+
for line in caplog.text.splitlines():
180+
if expected_token_prefix in line:
181+
aws_request_present = True
182+
# getattr is used to stay compatible with old driver - before SECRET_STARRED_MASK_STR was added
183+
assert (
184+
expected_token_prefix
185+
+ getattr(SecretDetector, "SECRET_STARRED_MASK_STR", "****")
186+
in line
187+
), "connectionpool logger is leaking sensitive information"
188+
189+
# If no AWS request appeared in logs, we cannot assert masking here.
190+
assert (
191+
aws_request_present
192+
), "AWS URL was not found in logs, so it can't be assumed that no leaks happened in it"

test/integ/aio/test_put_get_with_aws_token_async.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@
77

88
import glob
99
import gzip
10+
import logging
1011
import os
1112

1213
import pytest
1314
from aiohttp import ClientResponseError
1415

1516
from snowflake.connector.constants import UTF8
17+
from snowflake.connector.file_transfer_agent import SnowflakeS3ProgressPercentage
18+
from snowflake.connector.secret_detector import SecretDetector
1619

1720
try: # pragma: no cover
1821
from snowflake.connector.aio._file_transfer_agent import SnowflakeFileMeta
@@ -38,9 +41,10 @@
3841
@pytest.mark.parametrize(
3942
"from_path", [True, pytest.param(False, marks=pytest.mark.skipolddriver)]
4043
)
41-
async def test_put_get_with_aws(tmpdir, aio_connection, from_path):
44+
async def test_put_get_with_aws(tmpdir, aio_connection, from_path, caplog):
4245
"""[s3] Puts and Gets a small text using AWS S3."""
4346
# create a data file
47+
caplog.set_level(logging.DEBUG)
4448
fname = str(tmpdir.join("test_put_get_with_aws_token.txt.gz"))
4549
original_contents = "123,test1\n456,test2\n"
4650
with gzip.open(fname, "wb") as f:
@@ -60,6 +64,8 @@ async def test_put_get_with_aws(tmpdir, aio_connection, from_path):
6064
f"%{table_name}",
6165
from_path,
6266
sql_options=" auto_compress=true parallel=30",
67+
_put_callback=SnowflakeS3ProgressPercentage,
68+
_get_callback=SnowflakeS3ProgressPercentage,
6369
file_stream=file_stream,
6470
)
6571
rec = await csr.fetchone()
@@ -71,22 +77,44 @@ async def test_put_get_with_aws(tmpdir, aio_connection, from_path):
7177
f"copy into @%{table_name} from {table_name} "
7278
"file_format=(type=csv compression='gzip')"
7379
)
74-
await csr.execute(f"get @%{table_name} file://{tmp_dir}")
80+
await csr.execute(
81+
f"get @%{table_name} file://{tmp_dir}",
82+
_put_callback=SnowflakeS3ProgressPercentage,
83+
_get_callback=SnowflakeS3ProgressPercentage,
84+
)
7585
rec = await csr.fetchone()
7686
assert rec[0].startswith("data_"), "A file downloaded by GET"
7787
assert rec[1] == 36, "Return right file size"
7888
assert rec[2] == "DOWNLOADED", "Return DOWNLOADED status"
7989
assert rec[3] == "", "Return no error message"
8090
finally:
81-
await csr.execute(f"drop table {table_name}")
91+
await csr.execute(f"drop table if exists {table_name}")
8292
if file_stream:
8393
file_stream.close()
94+
await aio_connection.close()
8495

8596
files = glob.glob(os.path.join(tmp_dir, "data_*"))
8697
with gzip.open(files[0], "rb") as fd:
8798
contents = fd.read().decode(UTF8)
8899
assert original_contents == contents, "Output is different from the original file"
89100

101+
aws_request_present = False
102+
expected_token_prefix = "X-Amz-Signature="
103+
for line in caplog.text.splitlines():
104+
if ".amazonaws." in line:
105+
aws_request_present = True
106+
# getattr is used to stay compatible with old driver - before SECRET_STARRED_MASK_STR was added
107+
assert (
108+
expected_token_prefix
109+
+ getattr(SecretDetector, "SECRET_STARRED_MASK_STR", "****")
110+
in line
111+
or expected_token_prefix not in line
112+
), "connectionpool logger is leaking sensitive information"
113+
114+
assert (
115+
aws_request_present
116+
), "AWS URL was not found in logs, so it can't be assumed that no leaks happened in it"
117+
90118

91119
@pytest.mark.skipolddriver
92120
async def test_put_with_invalid_token(tmpdir, aio_connection):
@@ -141,3 +169,4 @@ async def test_put_with_invalid_token(tmpdir, aio_connection):
141169
await client.upload_chunk(0)
142170
finally:
143171
await csr.execute(f"drop table if exists {table_name}")
172+
await aio_connection.close()

test/integ/aio/test_put_get_with_azure_token_async.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
SnowflakeAzureProgressPercentage,
2121
SnowflakeProgressPercentage,
2222
)
23+
from snowflake.connector.secret_detector import SecretDetector
2324

2425
try:
2526
from snowflake.connector.util_text import random_string
@@ -86,13 +87,24 @@ async def test_put_get_with_azure(tmpdir, aio_connection, from_path, caplog):
8687
finally:
8788
if file_stream:
8889
file_stream.close()
89-
await csr.execute(f"drop table {table_name}")
90+
await csr.execute(f"drop table if exists {table_name}")
91+
await aio_connection.close()
9092

93+
azure_request_present = False
94+
expected_token_prefix = "sig="
9195
for line in caplog.text.splitlines():
92-
if "blob.core.windows.net" in line:
96+
if "blob.core.windows.net" in line and expected_token_prefix in line:
97+
azure_request_present = True
98+
# getattr is used to stay compatible with old driver - before SECRET_STARRED_MASK_STR was added
9399
assert (
94-
"sig=" not in line
100+
expected_token_prefix
101+
+ getattr(SecretDetector, "SECRET_STARRED_MASK_STR", "****")
102+
in line
95103
), "connectionpool logger is leaking sensitive information"
104+
105+
assert (
106+
azure_request_present
107+
), "Azure URL was not found in logs, so it can't be assumed that no leaks happened in it"
96108
files = glob.glob(os.path.join(tmp_dir, "data_*"))
97109
with gzip.open(files[0], "rb") as fd:
98110
contents = fd.read().decode(UTF8)
@@ -141,6 +153,7 @@ async def run(csr, sql):
141153
assert rows == number_of_files * number_of_lines, "Number of rows"
142154
finally:
143155
await run(csr, "drop table if exists {name}")
156+
await aio_connection.close()
144157

145158

146159
async def test_put_copy_duplicated_files_azure(tmpdir, aio_connection):
@@ -216,6 +229,7 @@ async def run(csr, sql):
216229
assert rows == number_of_files * number_of_lines, "Number of rows"
217230
finally:
218231
await run(csr, "drop table if exists {name}")
232+
await aio_connection.close()
219233

220234

221235
async def test_put_get_large_files_azure(tmpdir, aio_connection):
@@ -280,3 +294,4 @@ async def run(cnx, sql):
280294
assert all([rec[2] == "DOWNLOADED" for rec in all_recs])
281295
finally:
282296
await run(aio_connection, "RM @~/{dir}")
297+
await aio_connection.close()

0 commit comments

Comments
 (0)