Skip to content

Commit f95cc0a

Browse files
committed
Fix analytics: preserve historical team data and don't filter Unassigned
- Remove 'Unassigned' from excluded_teams list so workspaces without Firestore data are still included in analytics - Add historical data preservation: load team assignments from previous snapshot before querying Firestore - Merge historical and current data: historical preserves deleted participants, current updates active participants - This ensures team assignments are preserved even after participants are removed from onboarding database - Fixes issue where removing participants caused historical data loss
1 parent 3b12c17 commit f95cc0a

File tree

1 file changed

+102
-6
lines changed

1 file changed

+102
-6
lines changed

scripts/collect_coder_analytics.py

Lines changed: 102 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,61 @@ def fetch_user_activity_insights(
216216
return {}
217217

218218

219+
def get_historical_participant_data(bucket_name: str) -> dict[str, dict[str, Any]]:
220+
"""Get historical participant data from the previous snapshot.
221+
222+
Parameters
223+
----------
224+
bucket_name : str
225+
Name of the GCS bucket containing snapshots
226+
227+
Returns
228+
-------
229+
dict[str, dict[str, Any]]
230+
Mapping of github_handle (lowercase) -> {
231+
'team_name': str,
232+
'first_name': str | None,
233+
'last_name': str | None
234+
}
235+
"""
236+
print("Fetching historical participant data from previous snapshot...")
237+
238+
try:
239+
storage_client = storage.Client()
240+
bucket = storage_client.bucket(bucket_name)
241+
latest_blob = bucket.blob("latest.json")
242+
243+
if not latest_blob.exists():
244+
print(" No previous snapshot found")
245+
return {}
246+
247+
content = latest_blob.download_as_text()
248+
snapshot = json.loads(content)
249+
250+
historical_data = {}
251+
for workspace in snapshot.get("workspaces", []):
252+
owner_name = workspace.get("owner_name", "").lower()
253+
team_name = workspace.get("team_name")
254+
first_name = workspace.get("owner_first_name")
255+
last_name = workspace.get("owner_last_name")
256+
257+
# Only store if we have actual data (not null/None)
258+
if team_name:
259+
historical_data[owner_name] = {
260+
"team_name": team_name,
261+
"first_name": first_name,
262+
"last_name": last_name,
263+
}
264+
265+
print(f"✓ Loaded historical data for {len(historical_data)} participants")
266+
return historical_data
267+
except Exception as e:
268+
print(f" Warning: Could not load historical data: {e}")
269+
return {}
270+
271+
219272
def get_participant_mappings() -> dict[str, dict[str, Any]]:
220-
"""Get participant data from Firestore including team and name info.
273+
"""Get current participant data from Firestore.
221274
222275
Returns
223276
-------
@@ -228,7 +281,7 @@ def get_participant_mappings() -> dict[str, dict[str, Any]]:
228281
'last_name': str | None
229282
}
230283
"""
231-
print("Fetching participant data from Firestore...")
284+
print("Fetching current participant data from Firestore...")
232285

233286
project_id = "coderd"
234287
database_id = "onboarding"
@@ -248,10 +301,48 @@ def get_participant_mappings() -> dict[str, dict[str, Any]]:
248301
"last_name": data.get("last_name"),
249302
}
250303

251-
print(f"✓ Loaded {len(mappings)} participant mappings")
304+
print(f"✓ Loaded {len(mappings)} current participant mappings")
252305
return mappings
253306

254307

308+
def merge_participant_data(
309+
historical_data: dict[str, dict[str, Any]], current_data: dict[str, dict[str, Any]]
310+
) -> dict[str, dict[str, Any]]:
311+
"""Merge historical and current participant data, preserving history.
312+
313+
Historical data takes precedence to preserve team assignments even after
314+
participants are removed from Firestore.
315+
316+
Parameters
317+
----------
318+
historical_data : dict[str, dict[str, Any]]
319+
Historical participant data from previous snapshot
320+
current_data : dict[str, dict[str, Any]]
321+
Current participant data from Firestore
322+
323+
Returns
324+
-------
325+
dict[str, dict[str, Any]]
326+
Merged participant data with historical preservation
327+
"""
328+
print("Merging historical and current participant data...")
329+
330+
# Start with historical data (preserves deleted participants)
331+
merged = historical_data.copy()
332+
333+
# Update with current data (adds new participants, updates existing)
334+
for handle, data in current_data.items():
335+
merged[handle] = data
336+
337+
print(f"✓ Merged data: {len(merged)} total participants")
338+
print(
339+
f" - Historical only (deleted): {len(set(historical_data.keys()) - set(current_data.keys()))}"
340+
)
341+
print(f" - Current (active): {len(current_data)}")
342+
343+
return merged
344+
345+
255346
def fetch_workspaces(
256347
participant_mappings: dict[str, dict[str, Any]], api_url: str, session_token: str
257348
) -> list[dict[str, Any]]:
@@ -275,7 +366,9 @@ def fetch_workspaces(
275366
workspaces = run_command(["coder", "list", "-a", "-o", "json"])
276367

277368
# Teams to exclude from analytics
278-
excluded_teams = ["facilitators", "Unassigned"]
369+
# NOTE: "Unassigned" is used as a fallback for participants not in Firestore
370+
# and should NOT be excluded - we want to see their workspace activity.
371+
excluded_teams = ["facilitators"]
279372

280373
original_count = len(workspaces)
281374

@@ -469,8 +562,11 @@ def main() -> None:
469562
api_url, session_token = get_coder_api_config()
470563
print(f"✓ Using Coder API: {api_url}")
471564

472-
# Fetch participant mappings first
473-
participant_mappings = get_participant_mappings()
565+
# Fetch participant data from multiple sources and merge
566+
# Historical data preserves team assignments for deleted participants
567+
historical_data = get_historical_participant_data(bucket_name)
568+
current_data = get_participant_mappings()
569+
participant_mappings = merge_participant_data(historical_data, current_data)
474570

475571
# Fetch data (with filtering and build enrichment)
476572
workspaces = fetch_workspaces(participant_mappings, api_url, session_token)

0 commit comments

Comments
 (0)