Skip to content

Commit 4ccba6a

Browse files
authored
SNOW-644849: Add telemetry about imported pacakages at runtime (#1236)
1 parent 2c3f6d5 commit 4ccba6a

File tree

3 files changed

+118
-1
lines changed

3 files changed

+118
-1
lines changed

src/snowflake/connector/connection.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,12 @@
8383
SnowflakeRestful,
8484
)
8585
from .sqlstate import SQLSTATE_CONNECTION_NOT_EXISTS, SQLSTATE_FEATURE_NOT_SUPPORTED
86-
from .telemetry import TelemetryClient
86+
from .telemetry import (
87+
TelemetryClient,
88+
TelemetryData,
89+
TelemetryField,
90+
generate_telemetry_data,
91+
)
8792
from .telemetry_oob import TelemetryService
8893
from .time_util import HeartBeatTimer, get_time_millis
8994
from .util_text import construct_hostname, parse_account, split_statements
@@ -192,6 +197,10 @@ def DefaultConverterClass():
192197
None,
193198
(type(None), str),
194199
), # Path to connection diag whitelist json
200+
"log_imported_packages_in_telemetry": (
201+
True,
202+
bool,
203+
), # Whether to log imported packages in telemetry
195204
}
196205

197206
APPLICATION_RE = re.compile(r"[\w\d_]+")
@@ -292,6 +301,9 @@ def __init__(self, **kwargs):
292301
self.connect(**kwargs)
293302
self._telemetry = TelemetryClient(self._rest)
294303

304+
# get the imported modules from sys.modules
305+
self._log_telemetry_imported_packages()
306+
295307
def __del__(self): # pragma: no cover
296308
try:
297309
self.close(retry=False)
@@ -1541,3 +1553,28 @@ def _all_async_queries_finished(self) -> bool:
15411553
not self.is_still_running(self.get_query_status(q)) for q in queries
15421554
)
15431555
return all(finished_async_queries)
1556+
1557+
def _log_telemetry_imported_packages(self) -> None:
1558+
if self._log_imported_packages_in_telemetry:
1559+
# filter out duplicates caused by submodules
1560+
# and internal modules with names starting with an underscore
1561+
imported_modules = {
1562+
k.split(".", maxsplit=1)[0]
1563+
for k in sys.modules.keys()
1564+
if not k.startswith("_")
1565+
}
1566+
ts = get_time_millis()
1567+
self._log_telemetry(
1568+
TelemetryData(
1569+
generate_telemetry_data(
1570+
from_dict={
1571+
TelemetryField.KEY_TYPE.value: TelemetryField.IMPORTED_PACKAGES.value,
1572+
TelemetryField.KEY_SOURCE.value: self.application
1573+
if self.application
1574+
else CLIENT_NAME,
1575+
TelemetryField.KEY_VALUE.value: str(imported_modules),
1576+
}
1577+
),
1578+
ts,
1579+
)
1580+
)

src/snowflake/connector/telemetry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class TelemetryField(Enum):
3636
# fetch_arrow_* usage
3737
ARROW_FETCH_ALL = "client_fetch_arrow_all"
3838
ARROW_FETCH_BATCHES = "client_fetch_arrow_batches"
39+
# imported packages along with client
40+
IMPORTED_PACKAGES = "client_imported_packages"
3941
# Keys for telemetry data sent through either in-band or out-of-band telemetry
4042
KEY_TYPE = "type"
4143
KEY_SOURCE = "source"

test/integ/test_connection.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from snowflake.connector.errors import Error, ForbiddenError
3636
from snowflake.connector.network import APPLICATION_SNOWSQL, ReauthenticationRequest
3737
from snowflake.connector.sqlstate import SQLSTATE_FEATURE_NOT_SUPPORTED
38+
from snowflake.connector.telemetry import TelemetryField
3839

3940
try: # pragma: no cover
4041
from parameters import CONNECTION_PARAMETERS_ADMIN
@@ -1107,3 +1108,80 @@ def test_ocsp_cache_working(conn_cnx):
11071108
with conn_cnx() as cnx:
11081109
assert cnx
11091110
assert OCSP_CACHE.telemetry["hit"] + OCSP_CACHE.telemetry["miss"] > original_count
1111+
1112+
1113+
@pytest.mark.skipolddriver
1114+
def test_imported_packages_telemetry(conn_cnx, capture_sf_telemetry, db_parameters):
1115+
# these imports are not used but for testing
1116+
import html.parser # noqa: F401
1117+
import json # noqa: F401
1118+
import multiprocessing as mp # noqa: F401
1119+
from datetime import date # noqa: F401
1120+
from math import sqrt # noqa: F401
1121+
1122+
def check_packages(message: str, expected_packages: list[str]) -> bool:
1123+
return (
1124+
all([package in message for package in expected_packages])
1125+
and "__main__" not in message
1126+
)
1127+
1128+
packages = [
1129+
"pytest",
1130+
"unittest",
1131+
"json",
1132+
"multiprocessing",
1133+
"html",
1134+
"datetime",
1135+
"math",
1136+
]
1137+
1138+
with conn_cnx() as conn, capture_sf_telemetry.patch_connection(
1139+
conn, False
1140+
) as telemetry_test:
1141+
conn._log_telemetry_imported_packages()
1142+
assert len(telemetry_test.records) > 0
1143+
assert any(
1144+
[
1145+
t.message[TelemetryField.KEY_TYPE.value]
1146+
== TelemetryField.IMPORTED_PACKAGES.value
1147+
and CLIENT_NAME == t.message[TelemetryField.KEY_SOURCE.value]
1148+
and check_packages(t.message["value"], packages)
1149+
for t in telemetry_test.records
1150+
]
1151+
)
1152+
1153+
# test different application
1154+
new_application_name = "PythonSnowpark"
1155+
config = {
1156+
"user": db_parameters["user"],
1157+
"password": db_parameters["password"],
1158+
"host": db_parameters["host"],
1159+
"port": db_parameters["port"],
1160+
"account": db_parameters["account"],
1161+
"schema": db_parameters["schema"],
1162+
"database": db_parameters["database"],
1163+
"protocol": db_parameters["protocol"],
1164+
"timezone": "UTC",
1165+
"application": new_application_name,
1166+
}
1167+
with snowflake.connector.connect(
1168+
**config
1169+
) as conn, capture_sf_telemetry.patch_connection(conn, False) as telemetry_test:
1170+
conn._log_telemetry_imported_packages()
1171+
assert len(telemetry_test.records) > 0
1172+
assert any(
1173+
[
1174+
t.message[TelemetryField.KEY_TYPE.value]
1175+
== TelemetryField.IMPORTED_PACKAGES.value
1176+
and new_application_name == t.message[TelemetryField.KEY_SOURCE.value]
1177+
for t in telemetry_test.records
1178+
]
1179+
)
1180+
1181+
# test opt out
1182+
config["log_imported_packages_in_telemetry"] = False
1183+
with snowflake.connector.connect(
1184+
**config
1185+
) as conn, capture_sf_telemetry.patch_connection(conn, False) as telemetry_test:
1186+
conn._log_telemetry_imported_packages()
1187+
assert len(telemetry_test.records) == 0

0 commit comments

Comments
 (0)