Skip to content

Commit 5528d1b

Browse files
authored
feat(FIR-9997): Usage tracking (#172)
1 parent 0779fdf commit 5528d1b

File tree

7 files changed

+483
-6
lines changed

7 files changed

+483
-6
lines changed

src/firebolt/async_db/connection.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import socket
55
from json import JSONDecodeError
66
from types import TracebackType
7-
from typing import Any, Callable, List, Optional, Type
7+
from typing import Any, Callable, Dict, List, Optional, Type
88

99
from httpcore.backends.auto import AutoBackend
1010
from httpcore.backends.base import AsyncNetworkStream
@@ -24,6 +24,7 @@
2424
ACCOUNT_ENGINE_URL,
2525
ACCOUNT_ENGINE_URL_BY_DATABASE_NAME,
2626
)
27+
from firebolt.utils.usage_tracker import get_user_agent_header
2728
from firebolt.utils.util import fix_url_schema
2829

2930
DEFAULT_TIMEOUT_SECONDS: int = 5
@@ -166,6 +167,7 @@ async def connect_inner(
166167
account_name: Optional[str] = None,
167168
api_endpoint: str = DEFAULT_API_URL,
168169
use_token_cache: bool = True,
170+
additional_parameters: Dict[str, Any] = {},
169171
) -> Connection:
170172
"""Connect to Firebolt database.
171173
@@ -183,6 +185,8 @@ async def connect_inner(
183185
api_endpoint (str): Firebolt API endpoint. Used for authentication.
184186
use_token_cache (bool): Cached authentication token in filesystem.
185187
Default: True
188+
additional_parameters (Optional[Dict]): Dictionary of less widely-used
189+
arguments for connection.
186190
187191
Note:
188192
Providing both `engine_name` and `engine_url` would result in an error.
@@ -238,7 +242,9 @@ async def connect_inner(
238242
assert engine_url is not None
239243

240244
engine_url = fix_url_schema(engine_url)
241-
return connection_class(engine_url, database, auth, api_endpoint)
245+
return connection_class(
246+
engine_url, database, auth, api_endpoint, additional_parameters
247+
)
242248

243249
return connect_inner
244250

@@ -297,17 +303,19 @@ def __init__(
297303
database: str,
298304
auth: Auth,
299305
api_endpoint: str = DEFAULT_API_URL,
306+
additional_parameters: Dict[str, Any] = {},
300307
):
301308
# Override tcp keepalive settings for connection
302309
transport = AsyncHTTPTransport()
303310
transport._pool._network_backend = OverriddenHttpBackend()
304-
311+
connector_versions = additional_parameters.get("connector_versions", [])
305312
self._client = AsyncClient(
306313
auth=auth,
307314
base_url=engine_url,
308315
api_endpoint=api_endpoint,
309316
timeout=Timeout(DEFAULT_TIMEOUT_SECONDS, read=None),
310317
transport=transport,
318+
headers={"User-Agent": get_user_agent_header(connector_versions)},
311319
)
312320
self.api_endpoint = api_endpoint
313321
self.engine_url = engine_url
@@ -372,6 +380,9 @@ class Connection(BaseConnection):
372380
username: Firebolt account username
373381
password: Firebolt account password
374382
api_endpoint: Optional. Firebolt API endpoint. Used for authentication.
383+
connector_versions: Optional. Tuple of connector name and version or
384+
list of tuples of your connector stack. Useful for tracking custom
385+
connector usage.
375386
376387
Note:
377388
Firebolt currenly doesn't support transactions
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import inspect
2+
import logging
3+
from importlib import import_module
4+
from pathlib import Path
5+
from platform import python_version, release, system
6+
from sys import modules
7+
from typing import Dict, List, Optional, Tuple
8+
9+
from pydantic import BaseModel
10+
11+
from firebolt import __version__
12+
13+
14+
class ConnectorVersions(BaseModel):
15+
"""
16+
Verify correct parameter types
17+
"""
18+
19+
versions: List[Tuple[str, str]]
20+
21+
22+
logger = logging.getLogger(__name__)
23+
24+
25+
CONNECTOR_MAP = [
26+
(
27+
"DBT",
28+
"open",
29+
Path("dbt/adapters/firebolt/connections.py"),
30+
"dbt.adapters.firebolt",
31+
),
32+
(
33+
"Airflow",
34+
"get_conn",
35+
Path("firebolt_provider/hooks/firebolt.py"),
36+
"firebolt_provider",
37+
),
38+
(
39+
"AirbyteDestination",
40+
"establish_connection",
41+
Path("destination_firebolt/destination.py"),
42+
"",
43+
),
44+
(
45+
"AirbyteDestination",
46+
"establish_async_connection",
47+
Path("destination_firebolt/destination.py"),
48+
"",
49+
),
50+
("AirbyteSource", "establish_connection", Path("source_firebolt/source.py"), ""),
51+
(
52+
"AirbyteSource",
53+
"establish_async_connection",
54+
Path("source_firebolt/source.py"),
55+
"",
56+
),
57+
("SQLAlchemy", "connect", Path("sqlalchemy/engine/default.py"), "firebolt_db"),
58+
("FireboltCLI", "create_connection", Path("firebolt_cli/utils.py"), "firebolt_cli"),
59+
]
60+
61+
62+
def _os_compare(file: Path, expected: Path) -> bool:
63+
"""
64+
System-independent path comparison.
65+
66+
Args:
67+
file: file path to check against
68+
expected: expected file path
69+
70+
Returns:
71+
True if file ends with path
72+
"""
73+
return file.parts[-len(expected.parts) :] == expected.parts
74+
75+
76+
def get_sdk_properties() -> Tuple[str, str, str, str]:
77+
"""
78+
Detect Python, OS and SDK versions.
79+
80+
Returns:
81+
Python version, SDK version, OS name and "ciso" if imported
82+
"""
83+
py_version = python_version()
84+
sdk_version = __version__
85+
os_version = f"{system()} {release()}"
86+
ciso = "ciso8601" if "ciso8601" in modules.keys() else ""
87+
logger.debug(
88+
"Python %s detected. SDK %s OS %s %s",
89+
py_version,
90+
sdk_version,
91+
os_version,
92+
ciso,
93+
)
94+
return (py_version, sdk_version, os_version, ciso)
95+
96+
97+
def detect_connectors() -> Dict[str, str]:
98+
"""
99+
Detect which connectors are running the code by parsing the stack.
100+
Exceptions are ignored since this is intended for logging only.
101+
"""
102+
connectors: Dict[str, str] = {}
103+
stack = inspect.stack()
104+
for f in stack:
105+
try:
106+
for name, func, path, version_path in CONNECTOR_MAP:
107+
if f.function == func and _os_compare(Path(f.filename), path):
108+
if version_path:
109+
m = import_module(version_path)
110+
connectors[name] = m.__version__ # type: ignore
111+
else:
112+
# Some connectors don't have versions specified
113+
connectors[name] = ""
114+
# No need to carry on if connector is detected
115+
break
116+
except Exception:
117+
logger.debug(
118+
"Failed to extract version from %s in %s", f.function, f.filename
119+
)
120+
return connectors
121+
122+
123+
def format_as_user_agent(connectors: Dict[str, str]) -> str:
124+
"""
125+
Return a representation of a stored tracking data as a user-agent header.
126+
127+
Args:
128+
connectors: Dictionary of connector to version mappings
129+
130+
Returns:
131+
String of the current detected connector stack.
132+
"""
133+
py, sdk, os, ciso = get_sdk_properties()
134+
sdk_format = f"PythonSDK/{sdk} (Python {py}; {os}; {ciso})"
135+
connector_format = "".join(
136+
[f" {connector}/{version}" for connector, version in connectors.items()]
137+
)
138+
return sdk_format + connector_format
139+
140+
141+
def get_user_agent_header(
142+
connector_versions: Optional[List[Tuple[str, str]]] = []
143+
) -> str:
144+
"""
145+
Return a user agent header with connector stack and system information.
146+
147+
Args:
148+
connector_versions(Optional): User-supplied list of tuples of all connectors
149+
and their versions intended for tracking.
150+
151+
Returns:
152+
String representation of a user-agent tracking information
153+
"""
154+
connectors = detect_connectors()
155+
logger.debug("Detected running from packages: %s", str(connectors))
156+
# Override auto-detected connectors with info provided manually
157+
for name, version in ConnectorVersions(versions=connector_versions).versions:
158+
connectors[name] = version
159+
return format_as_user_agent(connectors)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import sys
2+
3+
# Hack to avoid detecting current file as firebolt module
4+
old_path = sys.path
5+
sys.path = sys.path[1:]
6+
from firebolt.utils.usage_tracker import get_user_agent_header
7+
8+
# Back to old path for detection to work properly
9+
sys.path = old_path
10+
11+
12+
def {function_name}():
13+
print(get_user_agent_header())
14+
15+
{function_name}()
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import os
2+
from pathlib import Path
3+
from shutil import rmtree
4+
from subprocess import PIPE, run
5+
6+
from pytest import fixture, mark
7+
8+
TEST_FOLDER = "tmp_test_code/"
9+
TEST_SCRIPT_MODEL = "tests/integration/utils/sample_usage.model"
10+
11+
12+
MOCK_MODULES = [
13+
"firebolt_cli/firebolt_cli.py",
14+
"sqlalchemy/engine/firebolt_db.py",
15+
"firebolt_provider/hooks/firebolt_provider.py",
16+
"dbt/adapters/firebolt/dbt/adapters/firebolt.py",
17+
]
18+
19+
20+
@fixture(scope="module", autouse=True)
21+
def create_cli_mock():
22+
for i, file in enumerate(MOCK_MODULES):
23+
os.makedirs(os.path.dirname(f"{TEST_FOLDER}{file}"))
24+
with open(f"{TEST_FOLDER}{file}", "w") as f:
25+
f.write(f"__version__ = '1.0.{i}'")
26+
# Additional setup for proper dbt import
27+
Path(f"{TEST_FOLDER}dbt/adapters/firebolt/dbt/__init__.py").touch()
28+
Path(f"{TEST_FOLDER}/dbt/adapters/firebolt/dbt/adapters/__init__.py").touch()
29+
yield
30+
rmtree(TEST_FOLDER)
31+
32+
33+
@fixture(scope="module")
34+
def test_model():
35+
with open(TEST_SCRIPT_MODEL) as f:
36+
return f.read()
37+
38+
39+
def create_test_file(code: str, function_name: str, file_path: str):
40+
code = code.format(function_name=function_name)
41+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
42+
with open(file_path, "w") as f:
43+
f.write(code)
44+
45+
46+
@mark.parametrize(
47+
"function,path,expected",
48+
[
49+
("create_connection", "firebolt_cli/utils.py", "FireboltCLI/1.0.0"),
50+
("connect", "sqlalchemy/engine/default.py", "SQLAlchemy/1.0.1"),
51+
("establish_connection", "source_firebolt/source.py", "AirbyteSource/"),
52+
("establish_async_connection", "source_firebolt/source.py", "AirbyteSource/"),
53+
(
54+
"establish_connection",
55+
"destination_firebolt/destination.py",
56+
"AirbyteDestination/",
57+
),
58+
(
59+
"establish_async_connection",
60+
"destination_firebolt/destination.py",
61+
"AirbyteDestination/",
62+
),
63+
("get_conn", "firebolt_provider/hooks/firebolt.py", "Airflow/1.0.2"),
64+
("open", "dbt/adapters/firebolt/connections.py", "DBT/1.0.3"),
65+
],
66+
)
67+
def test_usage_detection(function, path, expected, test_model):
68+
test_path = TEST_FOLDER + path
69+
create_test_file(test_model, function, test_path)
70+
result = run(
71+
["python3", test_path],
72+
stdout=PIPE,
73+
stderr=PIPE,
74+
env={"PYTHONPATH": os.getenv("PYTHONPATH", ""), "PATH": os.getenv("PATH", "")},
75+
)
76+
assert not result.stderr
77+
assert expected in result.stdout.decode("utf-8")

0 commit comments

Comments
 (0)