|
| 1 | +""" |
| 2 | +Script to recalculate user adoption from existing metrics in Elasticsearch |
| 3 | +""" |
| 4 | +from elasticsearch import Elasticsearch |
| 5 | +from datetime import datetime |
| 6 | +import hashlib |
| 7 | +import math |
| 8 | + |
| 9 | +# Connect to Elasticsearch |
| 10 | +es = Elasticsearch(["http://localhost:9200"]) |
| 11 | + |
| 12 | +# Configuration |
| 13 | +INDEX_USER_METRICS = "copilot_user_metrics" |
| 14 | +INDEX_USER_ADOPTION = "copilot_user_adoption" |
| 15 | + |
| 16 | + |
| 17 | +def generate_unique_hash(data, key_properties=[]): |
| 18 | + key_elements = [] |
| 19 | + for key_property in key_properties: |
| 20 | + value = data.get(key_property) |
| 21 | + key_elements.append(str(value) if value is not None else "") |
| 22 | + key_string = "-".join(key_elements) |
| 23 | + unique_hash = hashlib.sha256(key_string.encode()).hexdigest() |
| 24 | + return unique_hash |
| 25 | + |
| 26 | + |
| 27 | +def _compute_percentile(sorted_values, percentile): |
| 28 | + if not sorted_values: |
| 29 | + return 0.0 |
| 30 | + k = (len(sorted_values) - 1) * (percentile / 100) |
| 31 | + lower = math.floor(k) |
| 32 | + upper = math.ceil(k) |
| 33 | + if lower == upper: |
| 34 | + return float(sorted_values[int(k)]) |
| 35 | + lower_value = sorted_values[lower] |
| 36 | + upper_value = sorted_values[upper] |
| 37 | + weight_upper = k - lower |
| 38 | + weight_lower = upper - k |
| 39 | + return float(lower_value) * weight_lower + float(upper_value) * weight_upper |
| 40 | + |
| 41 | + |
| 42 | +def _robust_scale(value, lower, upper): |
| 43 | + if upper <= lower: |
| 44 | + return 1.0 |
| 45 | + return max(0.0, min(1.0, (value - lower) / (upper - lower))) |
| 46 | + |
| 47 | + |
| 48 | +def fetch_user_metrics(): |
| 49 | + """Fetch all user metrics from all organizations""" |
| 50 | + query = { |
| 51 | + "query": { |
| 52 | + "match_all": {} |
| 53 | + }, |
| 54 | + "size": 10000, |
| 55 | + "sort": [{"day": {"order": "desc"}}] |
| 56 | + } |
| 57 | + |
| 58 | + result = es.search(index=INDEX_USER_METRICS, body=query) |
| 59 | + metrics = [hit["_source"] for hit in result["hits"]["hits"]] |
| 60 | + print(f"Fetched {len(metrics)} user metrics records") |
| 61 | + return metrics |
| 62 | + |
| 63 | + |
| 64 | +def build_user_adoption_leaderboard(metrics_data, organization_slug, top_n=10): |
| 65 | + """Calculate adoption scores from metrics data (same logic as main.py)""" |
| 66 | + if not metrics_data: |
| 67 | + return [] |
| 68 | + |
| 69 | + grouped = {} |
| 70 | + report_start_days = set() |
| 71 | + report_end_days = set() |
| 72 | + |
| 73 | + for record in metrics_data: |
| 74 | + login = record.get("user_login") or "unknown" |
| 75 | + entry = grouped.setdefault(login, { |
| 76 | + "events_logged": 0, |
| 77 | + "volume": 0, |
| 78 | + "code_generation": 0, |
| 79 | + "code_acceptance": 0, |
| 80 | + "loc_added": 0, |
| 81 | + "loc_suggested": 0, |
| 82 | + "agent_usage": 0, |
| 83 | + "chat_usage": 0, |
| 84 | + "days": set(), |
| 85 | + }) |
| 86 | + |
| 87 | + entry["events_logged"] += 1 |
| 88 | + entry["volume"] += record.get("user_initiated_interaction_count", 0) |
| 89 | + entry["code_generation"] += record.get("code_generation_activity_count", 0) |
| 90 | + entry["code_acceptance"] += record.get("code_acceptance_activity_count", 0) |
| 91 | + entry["loc_added"] += record.get("loc_added_sum", 0) |
| 92 | + entry["loc_suggested"] += record.get("loc_suggested_to_add_sum", 0) |
| 93 | + if record.get("used_agent"): |
| 94 | + entry["agent_usage"] += 1 |
| 95 | + if record.get("used_chat"): |
| 96 | + entry["chat_usage"] += 1 |
| 97 | + day_val = record.get("day") |
| 98 | + if day_val: |
| 99 | + entry["days"].add(day_val) |
| 100 | + |
| 101 | + start_day = record.get("report_start_day") |
| 102 | + if start_day: |
| 103 | + report_start_days.add(start_day) |
| 104 | + end_day = record.get("report_end_day") |
| 105 | + if end_day: |
| 106 | + report_end_days.add(end_day) |
| 107 | + |
| 108 | + global_start_day = min(report_start_days) if report_start_days else None |
| 109 | + global_end_day = max(report_end_days) if report_end_days else None |
| 110 | + |
| 111 | + summaries = [] |
| 112 | + for login, stats in grouped.items(): |
| 113 | + active_days = len(stats["days"]) |
| 114 | + interaction_per_day = ( |
| 115 | + stats["volume"] / active_days if active_days else 0.0 |
| 116 | + ) |
| 117 | + acceptance_rate = ( |
| 118 | + stats["code_acceptance"] / stats["code_generation"] |
| 119 | + if stats["code_generation"] |
| 120 | + else 0.0 |
| 121 | + ) |
| 122 | + average_loc_added = ( |
| 123 | + stats["loc_added"] / active_days if active_days else 0.0 |
| 124 | + ) |
| 125 | + feature_breadth = stats["agent_usage"] + stats["chat_usage"] |
| 126 | + |
| 127 | + # Stamp a day for Grafana time filtering |
| 128 | + stamped_day = ( |
| 129 | + global_end_day if global_end_day else datetime.utcnow().strftime("%Y-%m-%d") |
| 130 | + ) |
| 131 | + |
| 132 | + summary = { |
| 133 | + "user_login": login, |
| 134 | + "organization_slug": organization_slug, |
| 135 | + "slug_type": "Standalone", |
| 136 | + "events_logged": stats["events_logged"], |
| 137 | + "volume": stats["volume"], |
| 138 | + "code_generation_activity_count": stats["code_generation"], |
| 139 | + "code_acceptance_activity_count": stats["code_acceptance"], |
| 140 | + "loc_added_sum": stats["loc_added"], |
| 141 | + "loc_suggested_to_add_sum": stats["loc_suggested"], |
| 142 | + "average_loc_added": average_loc_added, |
| 143 | + "interactions_per_day": interaction_per_day, |
| 144 | + "acceptance_rate": acceptance_rate, |
| 145 | + "feature_breadth": feature_breadth, |
| 146 | + "agent_usage": stats["agent_usage"], |
| 147 | + "chat_usage": stats["chat_usage"], |
| 148 | + "active_days": active_days, |
| 149 | + "report_start_day": global_start_day, |
| 150 | + "report_end_day": global_end_day, |
| 151 | + "day": stamped_day, |
| 152 | + "bucket_type": "user", |
| 153 | + "is_top10": False, |
| 154 | + "rank": None, |
| 155 | + } |
| 156 | + |
| 157 | + summary["unique_hash"] = generate_unique_hash( |
| 158 | + summary, |
| 159 | + key_properties=[ |
| 160 | + "organization_slug", |
| 161 | + "user_login", |
| 162 | + "report_start_day", |
| 163 | + "report_end_day", |
| 164 | + "bucket_type", |
| 165 | + ], |
| 166 | + ) |
| 167 | + |
| 168 | + summaries.append(summary) |
| 169 | + |
| 170 | + if not summaries: |
| 171 | + return [] |
| 172 | + |
| 173 | + # Calculate percentile bounds for robust scaling |
| 174 | + signals = { |
| 175 | + "volume": [entry["volume"] for entry in summaries], |
| 176 | + "interactions_per_day": [entry["interactions_per_day"] for entry in summaries], |
| 177 | + "acceptance_rate": [entry["acceptance_rate"] for entry in summaries], |
| 178 | + "average_loc_added": [entry["average_loc_added"] for entry in summaries], |
| 179 | + "feature_breadth": [entry["feature_breadth"] for entry in summaries], |
| 180 | + } |
| 181 | + |
| 182 | + bounds = {} |
| 183 | + for key, values in signals.items(): |
| 184 | + sorted_values = sorted(values) |
| 185 | + lower = _compute_percentile(sorted_values, 5) |
| 186 | + upper = _compute_percentile(sorted_values, 95) |
| 187 | + bounds[key] = (lower, upper) |
| 188 | + |
| 189 | + # Calculate base scores with normalized components |
| 190 | + for entry in summaries: |
| 191 | + norm_volume = _robust_scale(entry["volume"], *bounds["volume"]) |
| 192 | + norm_interactions = _robust_scale( |
| 193 | + entry["interactions_per_day"], *bounds["interactions_per_day"] |
| 194 | + ) |
| 195 | + norm_acceptance = _robust_scale( |
| 196 | + entry["acceptance_rate"], *bounds["acceptance_rate"] |
| 197 | + ) |
| 198 | + norm_loc_added = _robust_scale( |
| 199 | + entry["average_loc_added"], *bounds["average_loc_added"] |
| 200 | + ) |
| 201 | + norm_feature = _robust_scale( |
| 202 | + entry["feature_breadth"], *bounds["feature_breadth"] |
| 203 | + ) |
| 204 | + |
| 205 | + base_score = ( |
| 206 | + 0.2 * norm_volume |
| 207 | + + 0.2 * norm_interactions |
| 208 | + + 0.2 * norm_acceptance |
| 209 | + + 0.2 * norm_loc_added |
| 210 | + + 0.2 * norm_feature |
| 211 | + ) |
| 212 | + entry["_base_score"] = base_score |
| 213 | + |
| 214 | + # Add consistency bonus |
| 215 | + max_active_days = max(entry["active_days"] for entry in summaries) |
| 216 | + for entry in summaries: |
| 217 | + bonus = 0.1 * (entry["active_days"] / max_active_days) if max_active_days else 0.0 |
| 218 | + bonus = min(bonus, 0.1) |
| 219 | + entry["consistency_bonus"] = bonus |
| 220 | + entry["adoption_score"] = entry["_base_score"] * (1 + bonus) |
| 221 | + |
| 222 | + # Convert to percentage (0-100) |
| 223 | + max_score = max(entry["adoption_score"] for entry in summaries) |
| 224 | + for entry in summaries: |
| 225 | + entry["adoption_pct"] = ( |
| 226 | + round(entry["adoption_score"] / max_score * 100, 1) |
| 227 | + if max_score |
| 228 | + else 0.0 |
| 229 | + ) |
| 230 | + |
| 231 | + # Sort and mark top 10 |
| 232 | + summaries.sort(key=lambda e: e["adoption_pct"], reverse=True) |
| 233 | + leaderboard = summaries[:top_n] |
| 234 | + for rank, entry in enumerate(leaderboard, start=1): |
| 235 | + entry["rank"] = rank |
| 236 | + entry["is_top10"] = True |
| 237 | + |
| 238 | + entries = [] |
| 239 | + for entry in leaderboard: |
| 240 | + entry["bucket_type"] = "user" |
| 241 | + entries.append(entry) |
| 242 | + |
| 243 | + # Create "Others" aggregate |
| 244 | + others = summaries[top_n:] |
| 245 | + if others: |
| 246 | + others_count = len(others) |
| 247 | + stamped_day = ( |
| 248 | + global_end_day if global_end_day else datetime.utcnow().strftime("%Y-%m-%d") |
| 249 | + ) |
| 250 | + |
| 251 | + others_entry = { |
| 252 | + "user_login": "Others", |
| 253 | + "organization_slug": organization_slug, |
| 254 | + "slug_type": "Standalone", |
| 255 | + "events_logged": sum(o["events_logged"] for o in others), |
| 256 | + "volume": sum(o["volume"] for o in others), |
| 257 | + "code_generation_activity_count": sum( |
| 258 | + o["code_generation_activity_count"] for o in others |
| 259 | + ), |
| 260 | + "code_acceptance_activity_count": sum( |
| 261 | + o["code_acceptance_activity_count"] for o in others |
| 262 | + ), |
| 263 | + "loc_added_sum": sum(o["loc_added_sum"] for o in others), |
| 264 | + "loc_suggested_to_add_sum": sum( |
| 265 | + o["loc_suggested_to_add_sum"] for o in others |
| 266 | + ), |
| 267 | + "average_loc_added": sum(o["average_loc_added"] for o in others) / others_count, |
| 268 | + "interactions_per_day": sum( |
| 269 | + o["interactions_per_day"] for o in others |
| 270 | + ) |
| 271 | + / others_count, |
| 272 | + "acceptance_rate": sum(o["acceptance_rate"] for o in others) / others_count, |
| 273 | + "feature_breadth": sum(o["feature_breadth"] for o in others) / others_count, |
| 274 | + "agent_usage": sum(o["agent_usage"] for o in others), |
| 275 | + "chat_usage": sum(o["chat_usage"] for o in others), |
| 276 | + "active_days": sum(o["active_days"] for o in others), |
| 277 | + "report_start_day": global_start_day, |
| 278 | + "report_end_day": global_end_day, |
| 279 | + "day": stamped_day, |
| 280 | + "bucket_type": "others", |
| 281 | + "is_top10": False, |
| 282 | + "rank": None, |
| 283 | + "others_count": others_count, |
| 284 | + "consistency_bonus": 0.0, |
| 285 | + } |
| 286 | + |
| 287 | + others_entry["adoption_score"] = ( |
| 288 | + sum(o["adoption_score"] for o in others) / others_count |
| 289 | + ) |
| 290 | + score_scale = max_score if max_score else 1 |
| 291 | + others_entry["adoption_pct"] = round( |
| 292 | + others_entry["adoption_score"] / score_scale * 100, 1 |
| 293 | + ) |
| 294 | + others_entry["unique_hash"] = generate_unique_hash( |
| 295 | + others_entry, |
| 296 | + key_properties=[ |
| 297 | + "organization_slug", |
| 298 | + "user_login", |
| 299 | + "report_start_day", |
| 300 | + "report_end_day", |
| 301 | + "bucket_type", |
| 302 | + ], |
| 303 | + ) |
| 304 | + entries.append(others_entry) |
| 305 | + |
| 306 | + # Clean up internal fields |
| 307 | + for entry in entries: |
| 308 | + entry.pop("_base_score", None) |
| 309 | + |
| 310 | + return entries |
| 311 | + |
| 312 | + |
| 313 | +def write_to_adoption_index(adoption_entries): |
| 314 | + """Write adoption entries to Elasticsearch""" |
| 315 | + print(f"Writing {len(adoption_entries)} adoption entries to {INDEX_USER_ADOPTION}...") |
| 316 | + |
| 317 | + for entry in adoption_entries: |
| 318 | + # Add @timestamp for Grafana time filtering |
| 319 | + entry["@timestamp"] = datetime.utcnow().isoformat() |
| 320 | + |
| 321 | + # Use unique_hash as document ID |
| 322 | + doc_id = entry["unique_hash"] |
| 323 | + |
| 324 | + try: |
| 325 | + es.index(index=INDEX_USER_ADOPTION, id=doc_id, document=entry) |
| 326 | + print(f" ✓ {entry['user_login']}: {entry['adoption_pct']}%") |
| 327 | + except Exception as e: |
| 328 | + print(f" ✗ Failed to write {entry['user_login']}: {e}") |
| 329 | + |
| 330 | + print(f"Successfully wrote {len(adoption_entries)} adoption entries") |
| 331 | + |
| 332 | + |
| 333 | +def main(): |
| 334 | + print("="*60) |
| 335 | + print("Recalculating User Adoption from Existing Metrics") |
| 336 | + print("="*60) |
| 337 | + |
| 338 | + # Fetch all user metrics |
| 339 | + all_metrics = fetch_user_metrics() |
| 340 | + |
| 341 | + if not all_metrics: |
| 342 | + print("No metrics data found. Cannot calculate adoption.") |
| 343 | + return |
| 344 | + |
| 345 | + # Group by organization |
| 346 | + orgs = {} |
| 347 | + for metric in all_metrics: |
| 348 | + org = metric.get("organization_slug", "unknown") |
| 349 | + if org not in orgs: |
| 350 | + orgs[org] = [] |
| 351 | + orgs[org].append(metric) |
| 352 | + |
| 353 | + print(f"Found {len(orgs)} organizations: {', '.join(orgs.keys())}") |
| 354 | + |
| 355 | + # Calculate adoption for each organization |
| 356 | + all_adoption_entries = [] |
| 357 | + for org_slug, metrics_data in orgs.items(): |
| 358 | + print(f"\nProcessing {org_slug}...") |
| 359 | + adoption_entries = build_user_adoption_leaderboard( |
| 360 | + metrics_data, |
| 361 | + org_slug, |
| 362 | + top_n=10 |
| 363 | + ) |
| 364 | + all_adoption_entries.extend(adoption_entries) |
| 365 | + |
| 366 | + if not all_adoption_entries: |
| 367 | + print("No adoption entries generated.") |
| 368 | + return |
| 369 | + |
| 370 | + # Write all to Elasticsearch |
| 371 | + write_to_adoption_index(all_adoption_entries) |
| 372 | + |
| 373 | + print("="*60) |
| 374 | + print("✓ Adoption data regenerated successfully!") |
| 375 | + print(f" Total entries: {len(all_adoption_entries)}") |
| 376 | + print(f" Organizations: {len(orgs)}") |
| 377 | + print("="*60) |
| 378 | + |
| 379 | + |
| 380 | +if __name__ == "__main__": |
| 381 | + main() |
0 commit comments