Skip to content

Commit 45fbc2f

Browse files
authored
Merge pull request #54 from vanchaudhary/main
Fix Top 10 Consistent Copilot Users panel with comprehensive adoption scoring
2 parents 260b753 + 156984a commit 45fbc2f

File tree

6 files changed

+465
-228
lines changed

6 files changed

+465
-228
lines changed

regenerate_adoption.py

Lines changed: 381 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,381 @@
1+
"""
2+
Script to recalculate user adoption from existing metrics in Elasticsearch
3+
"""
4+
from elasticsearch import Elasticsearch
5+
from datetime import datetime
6+
import hashlib
7+
import math
8+
9+
# Connect to Elasticsearch
10+
es = Elasticsearch(["http://localhost:9200"])
11+
12+
# Configuration
13+
INDEX_USER_METRICS = "copilot_user_metrics"
14+
INDEX_USER_ADOPTION = "copilot_user_adoption"
15+
16+
17+
def generate_unique_hash(data, key_properties=[]):
18+
key_elements = []
19+
for key_property in key_properties:
20+
value = data.get(key_property)
21+
key_elements.append(str(value) if value is not None else "")
22+
key_string = "-".join(key_elements)
23+
unique_hash = hashlib.sha256(key_string.encode()).hexdigest()
24+
return unique_hash
25+
26+
27+
def _compute_percentile(sorted_values, percentile):
28+
if not sorted_values:
29+
return 0.0
30+
k = (len(sorted_values) - 1) * (percentile / 100)
31+
lower = math.floor(k)
32+
upper = math.ceil(k)
33+
if lower == upper:
34+
return float(sorted_values[int(k)])
35+
lower_value = sorted_values[lower]
36+
upper_value = sorted_values[upper]
37+
weight_upper = k - lower
38+
weight_lower = upper - k
39+
return float(lower_value) * weight_lower + float(upper_value) * weight_upper
40+
41+
42+
def _robust_scale(value, lower, upper):
43+
if upper <= lower:
44+
return 1.0
45+
return max(0.0, min(1.0, (value - lower) / (upper - lower)))
46+
47+
48+
def fetch_user_metrics():
49+
"""Fetch all user metrics from all organizations"""
50+
query = {
51+
"query": {
52+
"match_all": {}
53+
},
54+
"size": 10000,
55+
"sort": [{"day": {"order": "desc"}}]
56+
}
57+
58+
result = es.search(index=INDEX_USER_METRICS, body=query)
59+
metrics = [hit["_source"] for hit in result["hits"]["hits"]]
60+
print(f"Fetched {len(metrics)} user metrics records")
61+
return metrics
62+
63+
64+
def build_user_adoption_leaderboard(metrics_data, organization_slug, top_n=10):
65+
"""Calculate adoption scores from metrics data (same logic as main.py)"""
66+
if not metrics_data:
67+
return []
68+
69+
grouped = {}
70+
report_start_days = set()
71+
report_end_days = set()
72+
73+
for record in metrics_data:
74+
login = record.get("user_login") or "unknown"
75+
entry = grouped.setdefault(login, {
76+
"events_logged": 0,
77+
"volume": 0,
78+
"code_generation": 0,
79+
"code_acceptance": 0,
80+
"loc_added": 0,
81+
"loc_suggested": 0,
82+
"agent_usage": 0,
83+
"chat_usage": 0,
84+
"days": set(),
85+
})
86+
87+
entry["events_logged"] += 1
88+
entry["volume"] += record.get("user_initiated_interaction_count", 0)
89+
entry["code_generation"] += record.get("code_generation_activity_count", 0)
90+
entry["code_acceptance"] += record.get("code_acceptance_activity_count", 0)
91+
entry["loc_added"] += record.get("loc_added_sum", 0)
92+
entry["loc_suggested"] += record.get("loc_suggested_to_add_sum", 0)
93+
if record.get("used_agent"):
94+
entry["agent_usage"] += 1
95+
if record.get("used_chat"):
96+
entry["chat_usage"] += 1
97+
day_val = record.get("day")
98+
if day_val:
99+
entry["days"].add(day_val)
100+
101+
start_day = record.get("report_start_day")
102+
if start_day:
103+
report_start_days.add(start_day)
104+
end_day = record.get("report_end_day")
105+
if end_day:
106+
report_end_days.add(end_day)
107+
108+
global_start_day = min(report_start_days) if report_start_days else None
109+
global_end_day = max(report_end_days) if report_end_days else None
110+
111+
summaries = []
112+
for login, stats in grouped.items():
113+
active_days = len(stats["days"])
114+
interaction_per_day = (
115+
stats["volume"] / active_days if active_days else 0.0
116+
)
117+
acceptance_rate = (
118+
stats["code_acceptance"] / stats["code_generation"]
119+
if stats["code_generation"]
120+
else 0.0
121+
)
122+
average_loc_added = (
123+
stats["loc_added"] / active_days if active_days else 0.0
124+
)
125+
feature_breadth = stats["agent_usage"] + stats["chat_usage"]
126+
127+
# Stamp a day for Grafana time filtering
128+
stamped_day = (
129+
global_end_day if global_end_day else datetime.utcnow().strftime("%Y-%m-%d")
130+
)
131+
132+
summary = {
133+
"user_login": login,
134+
"organization_slug": organization_slug,
135+
"slug_type": "Standalone",
136+
"events_logged": stats["events_logged"],
137+
"volume": stats["volume"],
138+
"code_generation_activity_count": stats["code_generation"],
139+
"code_acceptance_activity_count": stats["code_acceptance"],
140+
"loc_added_sum": stats["loc_added"],
141+
"loc_suggested_to_add_sum": stats["loc_suggested"],
142+
"average_loc_added": average_loc_added,
143+
"interactions_per_day": interaction_per_day,
144+
"acceptance_rate": acceptance_rate,
145+
"feature_breadth": feature_breadth,
146+
"agent_usage": stats["agent_usage"],
147+
"chat_usage": stats["chat_usage"],
148+
"active_days": active_days,
149+
"report_start_day": global_start_day,
150+
"report_end_day": global_end_day,
151+
"day": stamped_day,
152+
"bucket_type": "user",
153+
"is_top10": False,
154+
"rank": None,
155+
}
156+
157+
summary["unique_hash"] = generate_unique_hash(
158+
summary,
159+
key_properties=[
160+
"organization_slug",
161+
"user_login",
162+
"report_start_day",
163+
"report_end_day",
164+
"bucket_type",
165+
],
166+
)
167+
168+
summaries.append(summary)
169+
170+
if not summaries:
171+
return []
172+
173+
# Calculate percentile bounds for robust scaling
174+
signals = {
175+
"volume": [entry["volume"] for entry in summaries],
176+
"interactions_per_day": [entry["interactions_per_day"] for entry in summaries],
177+
"acceptance_rate": [entry["acceptance_rate"] for entry in summaries],
178+
"average_loc_added": [entry["average_loc_added"] for entry in summaries],
179+
"feature_breadth": [entry["feature_breadth"] for entry in summaries],
180+
}
181+
182+
bounds = {}
183+
for key, values in signals.items():
184+
sorted_values = sorted(values)
185+
lower = _compute_percentile(sorted_values, 5)
186+
upper = _compute_percentile(sorted_values, 95)
187+
bounds[key] = (lower, upper)
188+
189+
# Calculate base scores with normalized components
190+
for entry in summaries:
191+
norm_volume = _robust_scale(entry["volume"], *bounds["volume"])
192+
norm_interactions = _robust_scale(
193+
entry["interactions_per_day"], *bounds["interactions_per_day"]
194+
)
195+
norm_acceptance = _robust_scale(
196+
entry["acceptance_rate"], *bounds["acceptance_rate"]
197+
)
198+
norm_loc_added = _robust_scale(
199+
entry["average_loc_added"], *bounds["average_loc_added"]
200+
)
201+
norm_feature = _robust_scale(
202+
entry["feature_breadth"], *bounds["feature_breadth"]
203+
)
204+
205+
base_score = (
206+
0.2 * norm_volume
207+
+ 0.2 * norm_interactions
208+
+ 0.2 * norm_acceptance
209+
+ 0.2 * norm_loc_added
210+
+ 0.2 * norm_feature
211+
)
212+
entry["_base_score"] = base_score
213+
214+
# Add consistency bonus
215+
max_active_days = max(entry["active_days"] for entry in summaries)
216+
for entry in summaries:
217+
bonus = 0.1 * (entry["active_days"] / max_active_days) if max_active_days else 0.0
218+
bonus = min(bonus, 0.1)
219+
entry["consistency_bonus"] = bonus
220+
entry["adoption_score"] = entry["_base_score"] * (1 + bonus)
221+
222+
# Convert to percentage (0-100)
223+
max_score = max(entry["adoption_score"] for entry in summaries)
224+
for entry in summaries:
225+
entry["adoption_pct"] = (
226+
round(entry["adoption_score"] / max_score * 100, 1)
227+
if max_score
228+
else 0.0
229+
)
230+
231+
# Sort and mark top 10
232+
summaries.sort(key=lambda e: e["adoption_pct"], reverse=True)
233+
leaderboard = summaries[:top_n]
234+
for rank, entry in enumerate(leaderboard, start=1):
235+
entry["rank"] = rank
236+
entry["is_top10"] = True
237+
238+
entries = []
239+
for entry in leaderboard:
240+
entry["bucket_type"] = "user"
241+
entries.append(entry)
242+
243+
# Create "Others" aggregate
244+
others = summaries[top_n:]
245+
if others:
246+
others_count = len(others)
247+
stamped_day = (
248+
global_end_day if global_end_day else datetime.utcnow().strftime("%Y-%m-%d")
249+
)
250+
251+
others_entry = {
252+
"user_login": "Others",
253+
"organization_slug": organization_slug,
254+
"slug_type": "Standalone",
255+
"events_logged": sum(o["events_logged"] for o in others),
256+
"volume": sum(o["volume"] for o in others),
257+
"code_generation_activity_count": sum(
258+
o["code_generation_activity_count"] for o in others
259+
),
260+
"code_acceptance_activity_count": sum(
261+
o["code_acceptance_activity_count"] for o in others
262+
),
263+
"loc_added_sum": sum(o["loc_added_sum"] for o in others),
264+
"loc_suggested_to_add_sum": sum(
265+
o["loc_suggested_to_add_sum"] for o in others
266+
),
267+
"average_loc_added": sum(o["average_loc_added"] for o in others) / others_count,
268+
"interactions_per_day": sum(
269+
o["interactions_per_day"] for o in others
270+
)
271+
/ others_count,
272+
"acceptance_rate": sum(o["acceptance_rate"] for o in others) / others_count,
273+
"feature_breadth": sum(o["feature_breadth"] for o in others) / others_count,
274+
"agent_usage": sum(o["agent_usage"] for o in others),
275+
"chat_usage": sum(o["chat_usage"] for o in others),
276+
"active_days": sum(o["active_days"] for o in others),
277+
"report_start_day": global_start_day,
278+
"report_end_day": global_end_day,
279+
"day": stamped_day,
280+
"bucket_type": "others",
281+
"is_top10": False,
282+
"rank": None,
283+
"others_count": others_count,
284+
"consistency_bonus": 0.0,
285+
}
286+
287+
others_entry["adoption_score"] = (
288+
sum(o["adoption_score"] for o in others) / others_count
289+
)
290+
score_scale = max_score if max_score else 1
291+
others_entry["adoption_pct"] = round(
292+
others_entry["adoption_score"] / score_scale * 100, 1
293+
)
294+
others_entry["unique_hash"] = generate_unique_hash(
295+
others_entry,
296+
key_properties=[
297+
"organization_slug",
298+
"user_login",
299+
"report_start_day",
300+
"report_end_day",
301+
"bucket_type",
302+
],
303+
)
304+
entries.append(others_entry)
305+
306+
# Clean up internal fields
307+
for entry in entries:
308+
entry.pop("_base_score", None)
309+
310+
return entries
311+
312+
313+
def write_to_adoption_index(adoption_entries):
314+
"""Write adoption entries to Elasticsearch"""
315+
print(f"Writing {len(adoption_entries)} adoption entries to {INDEX_USER_ADOPTION}...")
316+
317+
for entry in adoption_entries:
318+
# Add @timestamp for Grafana time filtering
319+
entry["@timestamp"] = datetime.utcnow().isoformat()
320+
321+
# Use unique_hash as document ID
322+
doc_id = entry["unique_hash"]
323+
324+
try:
325+
es.index(index=INDEX_USER_ADOPTION, id=doc_id, document=entry)
326+
print(f" ✓ {entry['user_login']}: {entry['adoption_pct']}%")
327+
except Exception as e:
328+
print(f" ✗ Failed to write {entry['user_login']}: {e}")
329+
330+
print(f"Successfully wrote {len(adoption_entries)} adoption entries")
331+
332+
333+
def main():
334+
print("="*60)
335+
print("Recalculating User Adoption from Existing Metrics")
336+
print("="*60)
337+
338+
# Fetch all user metrics
339+
all_metrics = fetch_user_metrics()
340+
341+
if not all_metrics:
342+
print("No metrics data found. Cannot calculate adoption.")
343+
return
344+
345+
# Group by organization
346+
orgs = {}
347+
for metric in all_metrics:
348+
org = metric.get("organization_slug", "unknown")
349+
if org not in orgs:
350+
orgs[org] = []
351+
orgs[org].append(metric)
352+
353+
print(f"Found {len(orgs)} organizations: {', '.join(orgs.keys())}")
354+
355+
# Calculate adoption for each organization
356+
all_adoption_entries = []
357+
for org_slug, metrics_data in orgs.items():
358+
print(f"\nProcessing {org_slug}...")
359+
adoption_entries = build_user_adoption_leaderboard(
360+
metrics_data,
361+
org_slug,
362+
top_n=10
363+
)
364+
all_adoption_entries.extend(adoption_entries)
365+
366+
if not all_adoption_entries:
367+
print("No adoption entries generated.")
368+
return
369+
370+
# Write all to Elasticsearch
371+
write_to_adoption_index(all_adoption_entries)
372+
373+
print("="*60)
374+
print("✓ Adoption data regenerated successfully!")
375+
print(f" Total entries: {len(all_adoption_entries)}")
376+
print(f" Organizations: {len(orgs)}")
377+
print("="*60)
378+
379+
380+
if __name__ == "__main__":
381+
main()

0 commit comments

Comments
 (0)