Skip to content

Commit c5fea25

Browse files
authored
Feat: Add performance stats and logging (#360)
1 parent ddbf6f4 commit c5fea25

File tree

12 files changed

+349
-157
lines changed

12 files changed

+349
-157
lines changed

airbyte/_connector_base.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
)
2323

2424
from airbyte import exceptions as exc
25+
from airbyte._util.connector_info import ConnectorRuntimeInfo
26+
from airbyte._util.hashing import one_way_hash
2527
from airbyte._util.telemetry import (
2628
EventState,
2729
log_config_validation_result,
@@ -76,6 +78,15 @@ def name(self) -> str:
7678
"""Get the name of the connector."""
7779
return self._name
7880

81+
def _get_connector_runtime_info(self) -> ConnectorRuntimeInfo:
82+
"""Get metadata for telemetry and performance logging."""
83+
return ConnectorRuntimeInfo(
84+
name=self.name,
85+
version=self.connector_version,
86+
executor_type=type(self.executor).__name__,
87+
config_hash=self.config_hash,
88+
)
89+
7990
def _print_info_message(
8091
self,
8192
message: str,
@@ -124,6 +135,22 @@ def _config(self) -> dict[str, Any]:
124135
)
125136
return self._config_dict
126137

138+
@property
139+
def config_hash(self) -> str | None:
140+
"""Get a hash of the current config.
141+
142+
Returns None if the config is not set.
143+
"""
144+
if self._config_dict is None:
145+
return None
146+
147+
try:
148+
return one_way_hash(self._config_dict)
149+
except Exception:
150+
# This can fail if there are unhashable values in the config,
151+
# or unexpected data types. In this case, return None.
152+
return None
153+
127154
def validate_config(self, config: dict[str, Any] | None = None) -> None:
128155
"""Validate the config against the spec.
129156
@@ -262,7 +289,11 @@ def connector_version(self) -> str | None:
262289
263290
Returns None if the version cannot be determined.
264291
"""
265-
return self.executor.get_installed_version()
292+
try:
293+
return self.executor.get_installed_version()
294+
except Exception:
295+
# Version not detected, so return None.
296+
return None
266297

267298
def check(self) -> None:
268299
"""Call check on the connector.

airbyte/_executors/declarative.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ def __init__(
6464

6565
self.reported_version: str | None = self._manifest_dict.get("version", None)
6666

67+
def get_installed_version(
68+
self,
69+
*,
70+
raise_on_error: bool = False,
71+
recheck: bool = False,
72+
) -> str | None:
73+
"""Detect the version of the connector installed."""
74+
_ = raise_on_error, recheck # Not used
75+
return self.reported_version
76+
6777
def _validate_manifest(self, manifest_dict: dict) -> None:
6878
"""Validate the manifest."""
6979
manifest_text = yaml.safe_dump(manifest_dict)

airbyte/_message_iterators.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,22 @@
88
from collections.abc import Iterator
99
from typing import IO, TYPE_CHECKING, cast
1010

11+
import pendulum
1112
import pydantic
1213
from typing_extensions import final
1314

1415
from airbyte_protocol.models import (
1516
AirbyteMessage,
1617
AirbyteRecordMessage,
18+
AirbyteStreamStatus,
19+
AirbyteStreamStatusTraceMessage,
20+
AirbyteTraceMessage,
21+
StreamDescriptor,
22+
TraceType,
1723
Type,
1824
)
1925

2026
from airbyte.constants import AB_EXTRACTED_AT_COLUMN
21-
from airbyte.progress import _new_stream_success_message
2227

2328

2429
if TYPE_CHECKING:
@@ -28,6 +33,24 @@
2833
from airbyte.results import ReadResult
2934

3035

36+
def _new_stream_success_message(stream_name: str) -> AirbyteMessage:
37+
"""Return a new stream success message."""
38+
return AirbyteMessage(
39+
type=Type.TRACE,
40+
trace=AirbyteTraceMessage(
41+
type=TraceType.STREAM_STATUS,
42+
stream=stream_name,
43+
emitted_at=pendulum.now().float_timestamp,
44+
stream_status=AirbyteStreamStatusTraceMessage(
45+
stream_descriptor=StreamDescriptor(
46+
name=stream_name,
47+
),
48+
status=AirbyteStreamStatus.COMPLETE,
49+
),
50+
),
51+
)
52+
53+
3154
class AirbyteMessageIterator:
3255
"""Abstract base class for Airbyte message iterables.
3356

airbyte/_util/connector_info.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
"""Connector info classes for PyAirbyte.
3+
4+
Used for telemetry and logging.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from dataclasses import asdict, dataclass
10+
from typing import Any
11+
12+
13+
@dataclass
14+
class RuntimeInfoBase:
15+
def to_dict(self) -> dict[str, Any]:
16+
return {k: v for k, v in asdict(self).items() if v is not None}
17+
18+
19+
@dataclass
20+
class WriterRuntimeInfo(RuntimeInfoBase):
21+
type: str
22+
config_hash: str | None = None
23+
24+
25+
@dataclass(kw_only=True)
26+
class ConnectorRuntimeInfo(RuntimeInfoBase):
27+
name: str
28+
executor_type: str | None = None
29+
version: str | None = None
30+
config_hash: str | None = None

airbyte/_util/hashing.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
"""Hashing utils for PyAirbyte."""
3+
4+
from __future__ import annotations
5+
6+
import hashlib
7+
from collections.abc import Mapping
8+
9+
10+
HASH_SEED = "PyAirbyte:"
11+
"""Additional seed for randomizing one-way hashed strings."""
12+
13+
14+
def one_way_hash(
15+
obj: Mapping | list | object,
16+
/,
17+
) -> str:
18+
"""Return a one-way hash of the given string.
19+
20+
To ensure a unique domain of hashes, we prepend a seed to the string before hashing.
21+
"""
22+
string_to_hash: str
23+
if isinstance(obj, Mapping):
24+
# Recursively sort and convert nested dictionaries to tuples of key-value pairs
25+
string_to_hash = str(sorted((k, one_way_hash(v)) for k, v in obj.items()))
26+
27+
elif isinstance(obj, list):
28+
# Recursively hash elements of the list
29+
string_to_hash = str([one_way_hash(item) for item in obj])
30+
31+
else:
32+
# Convert the object to a string
33+
string_to_hash = str(obj)
34+
35+
return hashlib.sha256((HASH_SEED + str(string_to_hash)).encode()).hexdigest()

airbyte/_util/telemetry.py

Lines changed: 21 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -32,39 +32,31 @@
3232
from __future__ import annotations
3333

3434
import datetime
35-
import hashlib
3635
import os
3736
from contextlib import suppress
38-
from dataclasses import asdict, dataclass
3937
from enum import Enum
4038
from functools import lru_cache
4139
from pathlib import Path
42-
from typing import TYPE_CHECKING, Any, cast
40+
from typing import Any, cast
4341

4442
import requests
4543
import ulid
4644
import yaml
4745

4846
from airbyte import exceptions as exc
4947
from airbyte._util import meta
48+
from airbyte._util.connector_info import (
49+
ConnectorRuntimeInfo,
50+
WriterRuntimeInfo,
51+
)
52+
from airbyte._util.hashing import one_way_hash
5053
from airbyte.version import get_version
5154

5255

53-
if TYPE_CHECKING:
54-
from airbyte._writers.base import AirbyteWriterInterface
55-
from airbyte.caches.base import CacheBase
56-
from airbyte.destinations.base import Destination
57-
from airbyte.sources.base import Source
58-
59-
6056
DEBUG = True
6157
"""Enable debug mode for telemetry code."""
6258

6359

64-
HASH_SEED = "PyAirbyte:"
65-
"""Additional seed for randomizing one-way hashed strings."""
66-
67-
6860
PYAIRBYTE_APP_TRACKING_KEY = (
6961
os.environ.get("AIRBYTE_TRACKING_KEY", "") or "cukeSffc0G6gFQehKDhhzSurDzVSZ2OP"
7062
)
@@ -185,83 +177,6 @@ class EventType(str, Enum):
185177
CHECK = "check"
186178

187179

188-
@dataclass
189-
class CacheTelemetryInfo:
190-
type: str
191-
192-
@classmethod
193-
def from_cache(cls, cache: CacheBase | None) -> CacheTelemetryInfo:
194-
if not cache:
195-
return cls(type="streaming")
196-
197-
return cls(type=type(cache).__name__)
198-
199-
200-
@dataclass
201-
class SourceTelemetryInfo:
202-
name: str
203-
executor_type: str
204-
version: str | None
205-
206-
@classmethod
207-
def from_source(cls, source: Source | str) -> SourceTelemetryInfo:
208-
if isinstance(source, str):
209-
return cls(
210-
name=str(source),
211-
executor_type=UNKNOWN,
212-
version=UNKNOWN,
213-
)
214-
215-
# Else, `source` should be a `Source` object at this point
216-
return cls(
217-
name=source.name,
218-
executor_type=type(source.executor).__name__,
219-
version=source.executor.reported_version,
220-
)
221-
222-
223-
@dataclass
224-
class DestinationTelemetryInfo:
225-
name: str
226-
executor_type: str
227-
version: str | None
228-
229-
@classmethod
230-
def from_destination(
231-
cls,
232-
destination: Destination | AirbyteWriterInterface | str | None,
233-
) -> DestinationTelemetryInfo:
234-
if not destination:
235-
return cls(name=UNKNOWN, executor_type=UNKNOWN, version=UNKNOWN)
236-
237-
if isinstance(destination, str):
238-
return cls(name=destination, executor_type=UNKNOWN, version=UNKNOWN)
239-
240-
if hasattr(destination, "executor"):
241-
return cls(
242-
name=destination.name,
243-
executor_type=type(destination.executor).__name__,
244-
version=destination.executor.reported_version,
245-
)
246-
247-
return cls(
248-
name=repr(destination),
249-
executor_type=UNKNOWN,
250-
version=UNKNOWN,
251-
)
252-
253-
254-
def one_way_hash(
255-
string_to_hash: Any, # noqa: ANN401 # Allow Any type
256-
/,
257-
) -> str:
258-
"""Return a one-way hash of the given string.
259-
260-
To ensure a unique domain of hashes, we prepend a seed to the string before hashing.
261-
"""
262-
return hashlib.sha256((HASH_SEED + str(string_to_hash)).encode()).hexdigest()
263-
264-
265180
@lru_cache
266181
def get_env_flags() -> dict[str, Any]:
267182
flags: dict[str, bool | str] = {
@@ -283,9 +198,9 @@ def get_env_flags() -> dict[str, Any]:
283198

284199
def send_telemetry(
285200
*,
286-
source: Source | str | None,
287-
destination: Destination | AirbyteWriterInterface | str | None,
288-
cache: CacheBase | None,
201+
source: ConnectorRuntimeInfo | None,
202+
destination: ConnectorRuntimeInfo | None,
203+
cache: WriterRuntimeInfo | None,
289204
state: EventState,
290205
event_type: EventType,
291206
number_of_records: int | None = None,
@@ -297,8 +212,6 @@ def send_telemetry(
297212

298213
payload_props: dict[str, str | int | dict] = {
299214
"session_id": PYAIRBYTE_SESSION_ID,
300-
"cache": asdict(CacheTelemetryInfo.from_cache(cache)),
301-
"destination": asdict(DestinationTelemetryInfo.from_destination(destination)),
302215
"state": state,
303216
"version": get_version(),
304217
"python_version": meta.get_python_version(),
@@ -308,7 +221,13 @@ def send_telemetry(
308221
}
309222

310223
if source:
311-
payload_props["source"] = asdict(SourceTelemetryInfo.from_source(source))
224+
payload_props["source"] = source.to_dict()
225+
226+
if destination:
227+
payload_props["destination"] = destination.to_dict()
228+
229+
if cache:
230+
payload_props["cache"] = cache.to_dict()
312231

313232
if exception:
314233
if isinstance(exception, exc.AirbyteError):
@@ -345,8 +264,8 @@ def log_config_validation_result(
345264
treated as a source name.
346265
"""
347266
send_telemetry(
348-
source=name if not name.startswith("destination-") else None,
349-
destination=name if name.startswith("destination-") else None,
267+
source=ConnectorRuntimeInfo(name=name) if not name.startswith("destination-") else None,
268+
destination=ConnectorRuntimeInfo(name=name) if name.startswith("destination-") else None,
350269
cache=None,
351270
state=state,
352271
event_type=EventType.VALIDATE,
@@ -365,8 +284,8 @@ def log_connector_check_result(
365284
treated as a source name.
366285
"""
367286
send_telemetry(
368-
source=name if not name.startswith("destination-") else None,
369-
destination=name if name.startswith("destination-") else None,
287+
source=ConnectorRuntimeInfo(name=name) if not name.startswith("destination-") else None,
288+
destination=ConnectorRuntimeInfo(name=name) if name.startswith("destination-") else None,
370289
cache=None,
371290
state=state,
372291
event_type=EventType.CHECK,
@@ -381,7 +300,7 @@ def log_install_state(
381300
) -> None:
382301
"""Log an install event."""
383302
send_telemetry(
384-
source=name,
303+
source=ConnectorRuntimeInfo(name=name),
385304
destination=None,
386305
cache=None,
387306
state=state,

0 commit comments

Comments
 (0)