|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | +Count how many users were created recently on each ServiceNow instance in the pool. |
| 4 | +
|
| 5 | +This reuses the instance loader and table API helper from the codebase. |
| 6 | +""" |
| 7 | + |
| 8 | +import logging |
| 9 | +from collections import defaultdict |
| 10 | +from datetime import datetime, timedelta, timezone |
| 11 | +from typing import Dict, List, Tuple |
| 12 | + |
| 13 | +WAND_ENTITY = "alexdrouin" |
| 14 | +WAND_PROJECT = "workarena-monitoring" |
| 15 | +RUN_VERSION = "v2" # Increment if you need to recreate runs after deletion |
| 16 | + |
| 17 | +from browsergym.workarena.api.utils import table_api_call |
| 18 | +from browsergym.workarena.instance import SNowInstance, fetch_instances |
| 19 | + |
| 20 | + |
| 21 | +def _time_window(hours: int = 24) -> Tuple[str, str]: |
| 22 | + end = datetime.now(timezone.utc) |
| 23 | + start = end - timedelta(hours=hours) |
| 24 | + ts_format = "%Y-%m-%d %H:%M:%S" |
| 25 | + return start.strftime(ts_format), end.strftime(ts_format) |
| 26 | + |
| 27 | + |
| 28 | +def _fetch_user_creations( |
| 29 | + instance: SNowInstance, start_ts: str, end_ts: str |
| 30 | +) -> List[Dict[str, str]]: |
| 31 | + # Query the audit log directly so deleted users are still counted. |
| 32 | + page_size = 10000 # avoid the default 100-row limit |
| 33 | + offset = 0 |
| 34 | + seen: Dict[str, Dict[str, str]] = {} |
| 35 | + while True: |
| 36 | + params = { |
| 37 | + "sysparm_query": f"tablename=sys_user^sys_created_on>={start_ts}^sys_created_on<{end_ts}", |
| 38 | + "sysparm_fields": "documentkey,sys_created_on,user,fieldname,newvalue", |
| 39 | + "sysparm_limit": page_size, |
| 40 | + "sysparm_offset": offset, |
| 41 | + } |
| 42 | + response = table_api_call(instance=instance, table="sys_audit", params=params) |
| 43 | + batch = response.get("result", []) |
| 44 | + for audit in batch: |
| 45 | + doc = audit.get("documentkey") |
| 46 | + if not doc: |
| 47 | + continue |
| 48 | + # Keep the earliest audit entry per user record. |
| 49 | + if doc not in seen or audit.get("sys_created_on", "") < seen[doc].get( |
| 50 | + "sys_created_on", "" |
| 51 | + ): |
| 52 | + seen[doc] = audit |
| 53 | + if len(batch) < page_size: |
| 54 | + break |
| 55 | + offset += page_size |
| 56 | + return list(seen.values()) |
| 57 | + |
| 58 | + |
| 59 | +def _parse_sys_created(ts: str | None) -> datetime | None: |
| 60 | + if not ts: |
| 61 | + return None |
| 62 | + ts = ts.replace("Z", "+00:00") |
| 63 | + # Try ISO parsing with timezone if provided |
| 64 | + try: |
| 65 | + dt = datetime.fromisoformat(ts) |
| 66 | + except ValueError: |
| 67 | + dt = None |
| 68 | + if dt is None: |
| 69 | + for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"): |
| 70 | + try: |
| 71 | + dt = datetime.strptime(ts, fmt) |
| 72 | + break |
| 73 | + except ValueError: |
| 74 | + continue |
| 75 | + if dt is None: |
| 76 | + return None |
| 77 | + if dt.tzinfo is None: |
| 78 | + dt = dt.replace(tzinfo=timezone.utc) |
| 79 | + return dt.astimezone(timezone.utc) |
| 80 | + |
| 81 | + |
| 82 | +def _hourly_counts(records: List[Dict[str, str]]) -> Dict[datetime, int]: |
| 83 | + buckets: Dict[datetime, int] = defaultdict(int) |
| 84 | + for record in records: |
| 85 | + ts = _parse_sys_created(record.get("sys_created_on")) |
| 86 | + if ts is None: |
| 87 | + continue |
| 88 | + bucket = ts.replace(minute=0, second=0, microsecond=0) |
| 89 | + buckets[bucket] += 1 |
| 90 | + return buckets |
| 91 | + |
| 92 | + |
| 93 | +def _daily_counts(records: List[Dict[str, str]]) -> Dict[datetime, int]: |
| 94 | + buckets: Dict[datetime, int] = defaultdict(int) |
| 95 | + for record in records: |
| 96 | + ts = _parse_sys_created(record.get("sys_created_on")) |
| 97 | + if ts is None: |
| 98 | + continue |
| 99 | + bucket = ts.replace(hour=0, minute=0, second=0, microsecond=0) |
| 100 | + buckets[bucket] += 1 |
| 101 | + return buckets |
| 102 | + |
| 103 | + |
| 104 | +def _init_wandb(instance_name: str | None = None): |
| 105 | + try: |
| 106 | + import wandb |
| 107 | + except ImportError as exc: |
| 108 | + raise SystemExit( |
| 109 | + "wandb is required for logging; install it to enable W&B logging." |
| 110 | + ) from exc |
| 111 | + |
| 112 | + # Use instance name or "total" as the display name |
| 113 | + display_name = instance_name or "total" |
| 114 | + # Add version suffix to run ID to avoid conflicts with deleted runs |
| 115 | + run_id = f"{display_name}-{RUN_VERSION}" |
| 116 | + |
| 117 | + run = wandb.init( |
| 118 | + project=WAND_PROJECT, |
| 119 | + entity=WAND_ENTITY, |
| 120 | + name=display_name, # Clean name for display |
| 121 | + mode="online", |
| 122 | + id=run_id, # Versioned ID for persistence |
| 123 | + resume="allow", |
| 124 | + settings=wandb.Settings(init_timeout=180), |
| 125 | + config={ |
| 126 | + "hours": 24, |
| 127 | + "instance": display_name, |
| 128 | + }, |
| 129 | + ) |
| 130 | + return run |
| 131 | + |
| 132 | + |
| 133 | +def _log_time_series_to_wandb( |
| 134 | + run, |
| 135 | + hourly_data: Dict[datetime, int], |
| 136 | + daily_data: Dict[datetime, int], |
| 137 | +): |
| 138 | + """Log time series data to a W&B run, ensuring chronological order.""" |
| 139 | + if run is None: |
| 140 | + return |
| 141 | + |
| 142 | + import wandb |
| 143 | + |
| 144 | + # Define metrics to allow out-of-order logging based on timestamp |
| 145 | + run.define_metric("daily_tasks_run", step_metric="timestamp", summary="last") |
| 146 | + run.define_metric("hourly_tasks_run", step_metric="timestamp", summary="last") |
| 147 | + run.define_metric("date", step_metric="timestamp") |
| 148 | + |
| 149 | + # Combine all timestamps and sort them chronologically |
| 150 | + all_data = [] |
| 151 | + |
| 152 | + # Add daily data points |
| 153 | + for bucket, count in daily_data.items(): |
| 154 | + all_data.append((bucket, "daily_tasks_run", count)) |
| 155 | + |
| 156 | + # Add hourly data points |
| 157 | + for bucket, count in hourly_data.items(): |
| 158 | + all_data.append((bucket, "hourly_tasks_run", count)) |
| 159 | + |
| 160 | + # Sort by timestamp |
| 161 | + all_data.sort(key=lambda x: x[0]) |
| 162 | + |
| 163 | + # Log in chronological order with human-readable date |
| 164 | + for bucket, metric_name, count in all_data: |
| 165 | + run.log( |
| 166 | + { |
| 167 | + "timestamp": int(bucket.timestamp()), |
| 168 | + metric_name: count, |
| 169 | + "date": bucket, # Pass datetime object directly for W&B to format |
| 170 | + } |
| 171 | + ) |
| 172 | + |
| 173 | + run.finish() |
| 174 | + |
| 175 | + |
| 176 | +def main(): |
| 177 | + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") |
| 178 | + |
| 179 | + start_ts, end_ts = _time_window() |
| 180 | + logging.info("Checking user creations between %s and %s (UTC)", start_ts, end_ts) |
| 181 | + |
| 182 | + instances = fetch_instances() |
| 183 | + if not instances: |
| 184 | + raise SystemExit("No ServiceNow instances available.") |
| 185 | + |
| 186 | + summaries: List[Tuple[str, int]] = [] |
| 187 | + hourly_totals: Dict[datetime, int] = defaultdict(int) |
| 188 | + hourly_per_instance: Dict[str, Dict[datetime, int]] = {} |
| 189 | + daily_totals: Dict[datetime, int] = defaultdict(int) |
| 190 | + daily_per_instance: Dict[str, Dict[datetime, int]] = {} |
| 191 | + |
| 192 | + # Fetch data from all instances |
| 193 | + for entry in instances: |
| 194 | + url = entry["url"] |
| 195 | + logging.info("Querying %s", url) |
| 196 | + try: |
| 197 | + instance = SNowInstance(snow_url=url, snow_credentials=("admin", entry["password"])) |
| 198 | + creations = _fetch_user_creations(instance=instance, start_ts=start_ts, end_ts=end_ts) |
| 199 | + summaries.append((url, len(creations))) |
| 200 | + hourly = _hourly_counts(creations) |
| 201 | + for bucket, count in hourly.items(): |
| 202 | + hourly_totals[bucket] += count |
| 203 | + hourly_per_instance[url] = hourly |
| 204 | + daily = _daily_counts(creations) |
| 205 | + for bucket, count in daily.items(): |
| 206 | + daily_totals[bucket] += count |
| 207 | + daily_per_instance[url] = daily |
| 208 | + logging.info("...found %s tasks run", len(creations)) |
| 209 | + except Exception: |
| 210 | + logging.exception("Failed to fetch data for %s", url) |
| 211 | + |
| 212 | + # Log total data to a separate W&B run |
| 213 | + logging.info("Logging total usage to W&B") |
| 214 | + total_run = _init_wandb(instance_name=None) |
| 215 | + _log_time_series_to_wandb(total_run, hourly_totals, daily_totals) |
| 216 | + |
| 217 | + # Log each instance's data to separate W&B runs |
| 218 | + for url, hourly_data in hourly_per_instance.items(): |
| 219 | + instance_name = url.split("//")[-1].replace(".service-now.com", "") |
| 220 | + logging.info(f"Logging {instance_name} usage to W&B") |
| 221 | + |
| 222 | + instance_run = _init_wandb(instance_name=instance_name) |
| 223 | + daily_data = daily_per_instance[url] |
| 224 | + _log_time_series_to_wandb(instance_run, hourly_data, daily_data) |
| 225 | + |
| 226 | + # Print summary |
| 227 | + total_created = sum(count for _, count in summaries) |
| 228 | + print(f"\nTotal tasks run across instances: {total_created}") |
| 229 | + |
| 230 | + for url, count in summaries: |
| 231 | + print(f"{url}: {count} task(s) run") |
| 232 | + |
| 233 | + if daily_totals: |
| 234 | + print("\nDaily task runs (UTC):") |
| 235 | + for bucket in sorted(daily_totals.keys()): |
| 236 | + ts_str = bucket.strftime("%Y-%m-%d") |
| 237 | + print(f"{ts_str}: {daily_totals[bucket]}") |
| 238 | + |
| 239 | + if hourly_totals: |
| 240 | + print("\nHourly task runs (UTC):") |
| 241 | + for bucket in sorted(hourly_totals.keys()): |
| 242 | + ts_str = bucket.strftime("%Y-%m-%d %H:%M") |
| 243 | + print(f"{ts_str}: {hourly_totals[bucket]}") |
| 244 | + |
| 245 | + |
| 246 | +if __name__ == "__main__": |
| 247 | + main() |
0 commit comments