Skip to content

Commit 384173c

Browse files
committed
Fix collection bug, add sorting
1 parent f00c1e4 commit 384173c

File tree

4 files changed

+297
-40
lines changed

4 files changed

+297
-40
lines changed

scripts/collect_coder_analytics.py

Lines changed: 170 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
"""
1212

1313
import json
14+
import os
1415
import subprocess
1516
import sys
1617
from datetime import UTC, datetime
1718
from typing import Any
1819

20+
import requests
21+
1922

2023
try:
2124
from google.cloud import firestore, storage
@@ -44,10 +47,132 @@ def run_command(cmd: list[str]) -> Any:
4447
sys.exit(1)
4548

4649

50+
def get_coder_api_config() -> tuple[str, str]:
51+
"""Get Coder API URL and session token from environment.
52+
53+
Returns
54+
-------
55+
tuple[str, str]
56+
Tuple of (api_url, session_token)
57+
"""
58+
# Get API URL from environment or use default
59+
api_url = os.getenv("CODER_URL", "https://platform.vectorinstitute.ai")
60+
61+
# Get token from environment (try CODER_TOKEN or CODER_SESSION_TOKEN)
62+
session_token = os.getenv("CODER_TOKEN") or os.getenv("CODER_SESSION_TOKEN")
63+
if not session_token:
64+
print("Error: CODER_TOKEN or CODER_SESSION_TOKEN environment variable not set")
65+
sys.exit(1)
66+
67+
return api_url, session_token
68+
69+
70+
def fetch_workspace_builds(
71+
workspace_id: str, api_url: str, session_token: str
72+
) -> list[dict[str, Any]]:
73+
"""Fetch all builds for a workspace using the Coder API.
74+
75+
Parameters
76+
----------
77+
workspace_id : str
78+
The UUID of the workspace
79+
api_url : str
80+
The Coder API base URL
81+
session_token : str
82+
The Coder session token for authentication
83+
84+
Returns
85+
-------
86+
list[dict[str, Any]]
87+
List of build objects for this workspace
88+
"""
89+
url = f"{api_url}/api/v2/workspaces/{workspace_id}/builds"
90+
headers = {"Coder-Session-Token": session_token}
91+
92+
try:
93+
response = requests.get(url, headers=headers, timeout=30)
94+
response.raise_for_status()
95+
return response.json()
96+
except requests.RequestException as e:
97+
print(f"Warning: Failed to fetch builds for workspace {workspace_id}: {e}")
98+
return []
99+
100+
101+
def calculate_build_usage_hours(build: dict[str, Any]) -> float:
102+
"""Calculate usage hours for a single build based on agent connection times.
103+
104+
Parameters
105+
----------
106+
build : dict[str, Any]
107+
A build object containing resources and agents
108+
109+
Returns
110+
-------
111+
float
112+
Usage hours for this build (0 if no valid connection data)
113+
"""
114+
try:
115+
resources = build.get("resources", [])
116+
earliest_connection = None
117+
latest_connection = None
118+
119+
for resource in resources:
120+
agents = resource.get("agents", [])
121+
for agent in agents:
122+
first_connected = agent.get("first_connected_at")
123+
last_connected = agent.get("last_connected_at")
124+
125+
if first_connected:
126+
first_dt = datetime.fromisoformat(
127+
first_connected.replace("Z", "+00:00")
128+
)
129+
if earliest_connection is None or first_dt < earliest_connection:
130+
earliest_connection = first_dt
131+
132+
if last_connected:
133+
last_dt = datetime.fromisoformat(
134+
last_connected.replace("Z", "+00:00")
135+
)
136+
if latest_connection is None or last_dt > latest_connection:
137+
latest_connection = last_dt
138+
139+
# Calculate hours between first and last connection
140+
if earliest_connection and latest_connection:
141+
delta = latest_connection - earliest_connection
142+
return delta.total_seconds() / 3600.0
143+
144+
return 0.0
145+
except Exception as e:
146+
print(f"Warning: Error calculating build usage hours: {e}")
147+
return 0.0
148+
149+
150+
def calculate_workspace_total_usage(builds: list[dict[str, Any]]) -> float:
151+
"""Calculate total usage hours across all builds for a workspace.
152+
153+
Parameters
154+
----------
155+
builds : list[dict[str, Any]]
156+
List of build objects for a workspace
157+
158+
Returns
159+
-------
160+
float
161+
Total usage hours summed across all builds
162+
"""
163+
total_hours = 0.0
164+
for build in builds:
165+
total_hours += calculate_build_usage_hours(build)
166+
return total_hours
167+
168+
47169
def get_team_mappings() -> dict[str, str]:
48170
"""Get team mappings from Firestore.
49171
50-
Returns a dict mapping github_handle (lowercase) -> team_name.
172+
Returns
173+
-------
174+
dict[str, str]
175+
Mapping of github_handle (lowercase) -> team_name
51176
"""
52177
print("Fetching team mappings from Firestore...")
53178

@@ -70,8 +195,25 @@ def get_team_mappings() -> dict[str, str]:
70195
return mappings
71196

72197

73-
def fetch_workspaces(team_mappings: dict[str, str]) -> list[dict[str, Any]]:
74-
"""Fetch all workspaces using Coder CLI and filter out excluded teams."""
198+
def fetch_workspaces(
199+
team_mappings: dict[str, str], api_url: str, session_token: str
200+
) -> list[dict[str, Any]]:
201+
"""Fetch all workspaces using Coder CLI and enrich with build history.
202+
203+
Parameters
204+
----------
205+
team_mappings : dict[str, str]
206+
Mapping of github_handle -> team_name
207+
api_url : str
208+
Coder API base URL
209+
session_token : str
210+
Coder session token for API authentication
211+
212+
Returns
213+
-------
214+
list[dict[str, Any]]
215+
List of workspace objects with builds and usage hours
216+
"""
75217
print("Fetching workspaces from Coder...")
76218
workspaces = run_command(["coder", "list", "-a", "-o", "json"])
77219

@@ -96,6 +238,25 @@ def fetch_workspaces(team_mappings: dict[str, str]) -> list[dict[str, Any]]:
96238
)
97239

98240
print(f"✓ Fetched {len(filtered_workspaces)} workspaces")
241+
242+
# Enrich workspaces with full build history and usage hours
243+
print("Enriching workspaces with build history...")
244+
for i, workspace in enumerate(filtered_workspaces, 1):
245+
workspace_id = workspace.get("id")
246+
if workspace_id:
247+
# Fetch all builds for this workspace
248+
builds = fetch_workspace_builds(workspace_id, api_url, session_token)
249+
workspace["all_builds"] = builds
250+
251+
# Calculate total usage hours across all builds
252+
total_usage_hours = calculate_workspace_total_usage(builds)
253+
workspace["total_usage_hours"] = round(total_usage_hours, 2)
254+
255+
# Progress indicator
256+
if i % 10 == 0:
257+
print(f" Processed {i}/{len(filtered_workspaces)} workspaces...")
258+
259+
print(f"✓ Enriched {len(filtered_workspaces)} workspaces with build history")
99260
return filtered_workspaces
100261

101262

@@ -202,11 +363,15 @@ def main() -> None:
202363
bucket_name = "coder-analytics-snapshots"
203364
save_local = "--local" in sys.argv
204365

366+
# Get Coder API configuration
367+
api_url, session_token = get_coder_api_config()
368+
print(f"✓ Using Coder API: {api_url}")
369+
205370
# Fetch team mappings first
206371
team_mappings = get_team_mappings()
207372

208-
# Fetch data (with filtering)
209-
workspaces = fetch_workspaces(team_mappings)
373+
# Fetch data (with filtering and build enrichment)
374+
workspaces = fetch_workspaces(team_mappings, api_url, session_token)
210375
templates = fetch_templates()
211376

212377
# Create snapshot

0 commit comments

Comments
 (0)