Skip to content

Commit 2f483ec

Browse files
authored
Chore: Add anonymous user ID in tracking events (#124)
1 parent 1e45e42 commit 2f483ec

File tree

11 files changed

+195
-5
lines changed

11 files changed

+195
-5
lines changed

.github/workflows/autofix.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ on:
55
repository_dispatch:
66
types: [autofix-command]
77

8+
env:
9+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
10+
811
jobs:
912
python-autofix:
1013
runs-on: ubuntu-latest

.github/workflows/pydoc_preview.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ on:
66
- main
77
pull_request: {}
88

9+
env:
10+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
911

1012
jobs:
1113
preview_docs:

.github/workflows/pydoc_publish.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ on:
88
# Allows you to run this workflow manually from the Actions tab
99
workflow_dispatch:
1010

11+
env:
12+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
13+
1114
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
1215
permissions:
1316
contents: read

.github/workflows/pypi_publish.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ on:
55

66
workflow_dispatch:
77

8+
env:
9+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
10+
811
jobs:
912
build:
1013
runs-on: ubuntu-latest

.github/workflows/python_lint.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ on:
66
- main
77
pull_request: {}
88

9+
env:
10+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
11+
912
jobs:
1013
ruff-lint-check:
1114
name: Ruff Lint Check

.github/workflows/python_pytest.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ on:
1313
- main
1414
pull_request: {}
1515

16+
env:
17+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
18+
1619
jobs:
1720
pytest-fast:
1821
name: Pytest (Fast)

.github/workflows/release_drafter.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ on:
55
branches:
66
- main
77

8+
env:
9+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
10+
811
permissions:
912
contents: read
1013

.github/workflows/semantic_pr_check.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
- edited
88
- synchronize
99

10+
env:
11+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
12+
1013
permissions:
1114
pull-requests: read
1215

.github/workflows/slash_command_dispatch.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ on:
44
issue_comment:
55
types: [created]
66

7+
env:
8+
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}
9+
710
jobs:
811
slashCommandDispatch:
912
runs-on: ubuntu-latest

airbyte/_util/telemetry.py

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@
3737
from dataclasses import asdict, dataclass
3838
from enum import Enum
3939
from functools import lru_cache
40-
from typing import TYPE_CHECKING, Any
40+
from pathlib import Path
41+
from typing import TYPE_CHECKING, Any, cast
4142

4243
import requests
4344
import ulid
45+
import yaml
4446

4547
from airbyte import exceptions as exc
4648
from airbyte._util import meta
@@ -52,6 +54,10 @@
5254
from airbyte.sources.base import Source
5355

5456

57+
DEBUG = True
58+
"""Enable debug mode for telemetry code."""
59+
60+
5561
HASH_SEED = "PyAirbyte:"
5662
"""Additional seed for randomizing one-way hashed strings."""
5763

@@ -73,6 +79,92 @@
7379
DO_NOT_TRACK = "DO_NOT_TRACK"
7480
"""Environment variable to opt-out of telemetry."""
7581

82+
_ENV_ANALYTICS_ID = "AIRBYTE_ANALYTICS_ID" # Allows user to override the anonymous user ID
83+
_ANALYTICS_FILE = Path.home() / ".airbyte" / "analytics.yml"
84+
_ANALYTICS_ID: str | bool | None = None
85+
86+
87+
def _setup_analytics() -> str | bool:
88+
"""Set up the analytics file if it doesn't exist.
89+
90+
Return the anonymous user ID or False if the user has opted out.
91+
"""
92+
anonymous_user_id: str | None = None
93+
issues: list[str] = []
94+
95+
if os.environ.get(DO_NOT_TRACK):
96+
# User has opted out of tracking.
97+
return False
98+
99+
if _ENV_ANALYTICS_ID in os.environ:
100+
# If the user has chosen to override their analytics ID, use that value and
101+
# remember it for future invocations.
102+
anonymous_user_id = os.environ[_ENV_ANALYTICS_ID]
103+
104+
if not _ANALYTICS_FILE.exists():
105+
# This is a one-time message to inform the user that we are tracking anonymous usage stats.
106+
print(
107+
"Anonymous usage reporting is enabled. For more information or to opt out, please"
108+
" see https://docs.airbyte.io/pyairbyte/anonymized-usage-statistics"
109+
)
110+
111+
if _ANALYTICS_FILE.exists():
112+
analytics_text = _ANALYTICS_FILE.read_text()
113+
try:
114+
analytics: dict = yaml.safe_load(analytics_text)
115+
except Exception as ex:
116+
issues += f"File appears corrupted. Error was: {ex!s}"
117+
118+
if analytics and "anonymous_user_id" in analytics:
119+
# The analytics ID was successfully located.
120+
if not anonymous_user_id:
121+
return analytics["anonymous_user_id"]
122+
123+
if anonymous_user_id == analytics["anonymous_user_id"]:
124+
# Values match, no need to update the file.
125+
return analytics["anonymous_user_id"]
126+
127+
issues.append("Provided analytics ID did not match the file. Rewriting the file.")
128+
print(
129+
f"Received a user-provided analytics ID override in the '{_ENV_ANALYTICS_ID}' "
130+
"environment variable."
131+
)
132+
133+
# File is missing, incomplete, or stale. Create a new one.
134+
anonymous_user_id = anonymous_user_id or str(ulid.ULID())
135+
try:
136+
_ANALYTICS_FILE.parent.mkdir(exist_ok=True, parents=True)
137+
_ANALYTICS_FILE.write_text(
138+
"# This file is used by PyAirbyte to track anonymous usage statistics.\n"
139+
"# For more information or to opt out, please see\n"
140+
"# - https://docs.airbyte.com/operator-guides/telemetry\n"
141+
f"anonymous_user_id: {anonymous_user_id}\n"
142+
)
143+
except Exception:
144+
# Failed to create the analytics file. Likely due to a read-only filesystem.
145+
issues.append("Failed to write the analytics file. Check filesystem permissions.")
146+
pass
147+
148+
if DEBUG and issues:
149+
nl = "\n"
150+
print(f"One or more issues occurred when configuring usage tracking:\n{nl.join(issues)}")
151+
152+
return anonymous_user_id
153+
154+
155+
def _get_analytics_id() -> str | None:
156+
result: str | bool | None = _ANALYTICS_ID
157+
if result is None:
158+
result = _setup_analytics()
159+
160+
if result is False:
161+
return None
162+
163+
return cast(str, result)
164+
165+
166+
_ANALYTICS_ID = _get_analytics_id()
167+
76168

77169
class SyncState(str, Enum):
78170
STARTED = "started"
@@ -174,7 +266,7 @@ def send_telemetry(
174266
"https://api.segment.io/v1/track",
175267
auth=(PYAIRBYTE_APP_TRACKING_KEY, ""),
176268
json={
177-
"anonymousId": "airbyte-lib-user",
269+
"anonymousId": _get_analytics_id(),
178270
"event": "sync",
179271
"properties": payload_props,
180272
"timestamp": datetime.datetime.utcnow().isoformat(), # noqa: DTZ003

0 commit comments

Comments
 (0)