@@ -216,8 +216,61 @@ def fetch_user_activity_insights(
216216 return {}
217217
218218
219+ def get_historical_participant_data (bucket_name : str ) -> dict [str , dict [str , Any ]]:
220+ """Get historical participant data from the previous snapshot.
221+
222+ Parameters
223+ ----------
224+ bucket_name : str
225+ Name of the GCS bucket containing snapshots
226+
227+ Returns
228+ -------
229+ dict[str, dict[str, Any]]
230+ Mapping of github_handle (lowercase) -> {
231+ 'team_name': str,
232+ 'first_name': str | None,
233+ 'last_name': str | None
234+ }
235+ """
236+ print ("Fetching historical participant data from previous snapshot..." )
237+
238+ try :
239+ storage_client = storage .Client ()
240+ bucket = storage_client .bucket (bucket_name )
241+ latest_blob = bucket .blob ("latest.json" )
242+
243+ if not latest_blob .exists ():
244+ print (" No previous snapshot found" )
245+ return {}
246+
247+ content = latest_blob .download_as_text ()
248+ snapshot = json .loads (content )
249+
250+ historical_data = {}
251+ for workspace in snapshot .get ("workspaces" , []):
252+ owner_name = workspace .get ("owner_name" , "" ).lower ()
253+ team_name = workspace .get ("team_name" )
254+ first_name = workspace .get ("owner_first_name" )
255+ last_name = workspace .get ("owner_last_name" )
256+
257+ # Only store if we have actual data (not null/None)
258+ if team_name :
259+ historical_data [owner_name ] = {
260+ "team_name" : team_name ,
261+ "first_name" : first_name ,
262+ "last_name" : last_name ,
263+ }
264+
265+ print (f"✓ Loaded historical data for { len (historical_data )} participants" )
266+ return historical_data
267+ except Exception as e :
268+ print (f" Warning: Could not load historical data: { e } " )
269+ return {}
270+
271+
219272def get_participant_mappings () -> dict [str , dict [str , Any ]]:
220- """Get participant data from Firestore including team and name info .
273+ """Get current participant data from Firestore.
221274
222275 Returns
223276 -------
@@ -228,7 +281,7 @@ def get_participant_mappings() -> dict[str, dict[str, Any]]:
228281 'last_name': str | None
229282 }
230283 """
231- print ("Fetching participant data from Firestore..." )
284+ print ("Fetching current participant data from Firestore..." )
232285
233286 project_id = "coderd"
234287 database_id = "onboarding"
@@ -248,10 +301,48 @@ def get_participant_mappings() -> dict[str, dict[str, Any]]:
248301 "last_name" : data .get ("last_name" ),
249302 }
250303
251- print (f"✓ Loaded { len (mappings )} participant mappings" )
304+ print (f"✓ Loaded { len (mappings )} current participant mappings" )
252305 return mappings
253306
254307
308+ def merge_participant_data (
309+ historical_data : dict [str , dict [str , Any ]], current_data : dict [str , dict [str , Any ]]
310+ ) -> dict [str , dict [str , Any ]]:
311+ """Merge historical and current participant data, preserving history.
312+
313+ Historical data takes precedence to preserve team assignments even after
314+ participants are removed from Firestore.
315+
316+ Parameters
317+ ----------
318+ historical_data : dict[str, dict[str, Any]]
319+ Historical participant data from previous snapshot
320+ current_data : dict[str, dict[str, Any]]
321+ Current participant data from Firestore
322+
323+ Returns
324+ -------
325+ dict[str, dict[str, Any]]
326+ Merged participant data with historical preservation
327+ """
328+ print ("Merging historical and current participant data..." )
329+
330+ # Start with historical data (preserves deleted participants)
331+ merged = historical_data .copy ()
332+
333+ # Update with current data (adds new participants, updates existing)
334+ for handle , data in current_data .items ():
335+ merged [handle ] = data
336+
337+ print (f"✓ Merged data: { len (merged )} total participants" )
338+ print (
339+ f" - Historical only (deleted): { len (set (historical_data .keys ()) - set (current_data .keys ()))} "
340+ )
341+ print (f" - Current (active): { len (current_data )} " )
342+
343+ return merged
344+
345+
255346def fetch_workspaces (
256347 participant_mappings : dict [str , dict [str , Any ]], api_url : str , session_token : str
257348) -> list [dict [str , Any ]]:
@@ -275,7 +366,9 @@ def fetch_workspaces(
275366 workspaces = run_command (["coder" , "list" , "-a" , "-o" , "json" ])
276367
277368 # Teams to exclude from analytics
278- excluded_teams = ["facilitators" , "Unassigned" ]
369+ # NOTE: "Unassigned" is used as a fallback for participants not in Firestore
370+ # and should NOT be excluded - we want to see their workspace activity.
371+ excluded_teams = ["facilitators" ]
279372
280373 original_count = len (workspaces )
281374
@@ -469,8 +562,11 @@ def main() -> None:
469562 api_url , session_token = get_coder_api_config ()
470563 print (f"✓ Using Coder API: { api_url } " )
471564
472- # Fetch participant mappings first
473- participant_mappings = get_participant_mappings ()
565+ # Fetch participant data from multiple sources and merge
566+ # Historical data preserves team assignments for deleted participants
567+ historical_data = get_historical_participant_data (bucket_name )
568+ current_data = get_participant_mappings ()
569+ participant_mappings = merge_participant_data (historical_data , current_data )
474570
475571 # Fetch data (with filtering and build enrichment)
476572 workspaces = fetch_workspaces (participant_mappings , api_url , session_token )
0 commit comments