Skip to content

Commit 743bdff

Browse files
authored
Merge pull request #121 from ServiceNow/pool-monitoring
Add daily pool usage monitoring with W&B integration
2 parents 9c44c0c + d5c7196 commit 743bdff

File tree

2 files changed

+278
-0
lines changed

2 files changed

+278
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Monitor Pool Usage
2+
3+
on:
4+
schedule:
5+
# Run daily at 00:00 UTC
6+
- cron: '0 0 * * *'
7+
workflow_dispatch: # Allow manual trigger
8+
9+
jobs:
10+
monitor:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v4
16+
17+
- name: Set up Python
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version: '3.10'
21+
22+
- name: Install dependencies
23+
run: |
24+
pip install -r requirements.txt
25+
pip install wandb>=0.16
26+
27+
- name: Run monitoring script
28+
env:
29+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
30+
run: |
31+
python monitor_pool_usage.py

monitor_pool_usage.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python
2+
"""
3+
Count how many users were created recently on each ServiceNow instance in the pool.
4+
5+
This reuses the instance loader and table API helper from the codebase.
6+
"""
7+
8+
import logging
9+
from collections import defaultdict
10+
from datetime import datetime, timedelta, timezone
11+
from typing import Dict, List, Tuple
12+
13+
WAND_ENTITY = "alexdrouin"
14+
WAND_PROJECT = "workarena-monitoring"
15+
RUN_VERSION = "v2" # Increment if you need to recreate runs after deletion
16+
17+
from browsergym.workarena.api.utils import table_api_call
18+
from browsergym.workarena.instance import SNowInstance, fetch_instances
19+
20+
21+
def _time_window(hours: int = 24) -> Tuple[str, str]:
22+
end = datetime.now(timezone.utc)
23+
start = end - timedelta(hours=hours)
24+
ts_format = "%Y-%m-%d %H:%M:%S"
25+
return start.strftime(ts_format), end.strftime(ts_format)
26+
27+
28+
def _fetch_user_creations(
29+
instance: SNowInstance, start_ts: str, end_ts: str
30+
) -> List[Dict[str, str]]:
31+
# Query the audit log directly so deleted users are still counted.
32+
page_size = 10000 # avoid the default 100-row limit
33+
offset = 0
34+
seen: Dict[str, Dict[str, str]] = {}
35+
while True:
36+
params = {
37+
"sysparm_query": f"tablename=sys_user^sys_created_on>={start_ts}^sys_created_on<{end_ts}",
38+
"sysparm_fields": "documentkey,sys_created_on,user,fieldname,newvalue",
39+
"sysparm_limit": page_size,
40+
"sysparm_offset": offset,
41+
}
42+
response = table_api_call(instance=instance, table="sys_audit", params=params)
43+
batch = response.get("result", [])
44+
for audit in batch:
45+
doc = audit.get("documentkey")
46+
if not doc:
47+
continue
48+
# Keep the earliest audit entry per user record.
49+
if doc not in seen or audit.get("sys_created_on", "") < seen[doc].get(
50+
"sys_created_on", ""
51+
):
52+
seen[doc] = audit
53+
if len(batch) < page_size:
54+
break
55+
offset += page_size
56+
return list(seen.values())
57+
58+
59+
def _parse_sys_created(ts: str | None) -> datetime | None:
60+
if not ts:
61+
return None
62+
ts = ts.replace("Z", "+00:00")
63+
# Try ISO parsing with timezone if provided
64+
try:
65+
dt = datetime.fromisoformat(ts)
66+
except ValueError:
67+
dt = None
68+
if dt is None:
69+
for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"):
70+
try:
71+
dt = datetime.strptime(ts, fmt)
72+
break
73+
except ValueError:
74+
continue
75+
if dt is None:
76+
return None
77+
if dt.tzinfo is None:
78+
dt = dt.replace(tzinfo=timezone.utc)
79+
return dt.astimezone(timezone.utc)
80+
81+
82+
def _hourly_counts(records: List[Dict[str, str]]) -> Dict[datetime, int]:
83+
buckets: Dict[datetime, int] = defaultdict(int)
84+
for record in records:
85+
ts = _parse_sys_created(record.get("sys_created_on"))
86+
if ts is None:
87+
continue
88+
bucket = ts.replace(minute=0, second=0, microsecond=0)
89+
buckets[bucket] += 1
90+
return buckets
91+
92+
93+
def _daily_counts(records: List[Dict[str, str]]) -> Dict[datetime, int]:
94+
buckets: Dict[datetime, int] = defaultdict(int)
95+
for record in records:
96+
ts = _parse_sys_created(record.get("sys_created_on"))
97+
if ts is None:
98+
continue
99+
bucket = ts.replace(hour=0, minute=0, second=0, microsecond=0)
100+
buckets[bucket] += 1
101+
return buckets
102+
103+
104+
def _init_wandb(instance_name: str | None = None):
105+
try:
106+
import wandb
107+
except ImportError as exc:
108+
raise SystemExit(
109+
"wandb is required for logging; install it to enable W&B logging."
110+
) from exc
111+
112+
# Use instance name or "total" as the display name
113+
display_name = instance_name or "total"
114+
# Add version suffix to run ID to avoid conflicts with deleted runs
115+
run_id = f"{display_name}-{RUN_VERSION}"
116+
117+
run = wandb.init(
118+
project=WAND_PROJECT,
119+
entity=WAND_ENTITY,
120+
name=display_name, # Clean name for display
121+
mode="online",
122+
id=run_id, # Versioned ID for persistence
123+
resume="allow",
124+
settings=wandb.Settings(init_timeout=180),
125+
config={
126+
"hours": 24,
127+
"instance": display_name,
128+
},
129+
)
130+
return run
131+
132+
133+
def _log_time_series_to_wandb(
134+
run,
135+
hourly_data: Dict[datetime, int],
136+
daily_data: Dict[datetime, int],
137+
):
138+
"""Log time series data to a W&B run, ensuring chronological order."""
139+
if run is None:
140+
return
141+
142+
import wandb
143+
144+
# Define metrics to allow out-of-order logging based on timestamp
145+
run.define_metric("daily_tasks_run", step_metric="timestamp", summary="last")
146+
run.define_metric("hourly_tasks_run", step_metric="timestamp", summary="last")
147+
run.define_metric("date", step_metric="timestamp")
148+
149+
# Combine all timestamps and sort them chronologically
150+
all_data = []
151+
152+
# Add daily data points
153+
for bucket, count in daily_data.items():
154+
all_data.append((bucket, "daily_tasks_run", count))
155+
156+
# Add hourly data points
157+
for bucket, count in hourly_data.items():
158+
all_data.append((bucket, "hourly_tasks_run", count))
159+
160+
# Sort by timestamp
161+
all_data.sort(key=lambda x: x[0])
162+
163+
# Log in chronological order with human-readable date
164+
for bucket, metric_name, count in all_data:
165+
run.log(
166+
{
167+
"timestamp": int(bucket.timestamp()),
168+
metric_name: count,
169+
"date": bucket, # Pass datetime object directly for W&B to format
170+
}
171+
)
172+
173+
run.finish()
174+
175+
176+
def main():
177+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
178+
179+
start_ts, end_ts = _time_window()
180+
logging.info("Checking user creations between %s and %s (UTC)", start_ts, end_ts)
181+
182+
instances = fetch_instances()
183+
if not instances:
184+
raise SystemExit("No ServiceNow instances available.")
185+
186+
summaries: List[Tuple[str, int]] = []
187+
hourly_totals: Dict[datetime, int] = defaultdict(int)
188+
hourly_per_instance: Dict[str, Dict[datetime, int]] = {}
189+
daily_totals: Dict[datetime, int] = defaultdict(int)
190+
daily_per_instance: Dict[str, Dict[datetime, int]] = {}
191+
192+
# Fetch data from all instances
193+
for entry in instances:
194+
url = entry["url"]
195+
logging.info("Querying %s", url)
196+
try:
197+
instance = SNowInstance(snow_url=url, snow_credentials=("admin", entry["password"]))
198+
creations = _fetch_user_creations(instance=instance, start_ts=start_ts, end_ts=end_ts)
199+
summaries.append((url, len(creations)))
200+
hourly = _hourly_counts(creations)
201+
for bucket, count in hourly.items():
202+
hourly_totals[bucket] += count
203+
hourly_per_instance[url] = hourly
204+
daily = _daily_counts(creations)
205+
for bucket, count in daily.items():
206+
daily_totals[bucket] += count
207+
daily_per_instance[url] = daily
208+
logging.info("...found %s tasks run", len(creations))
209+
except Exception:
210+
logging.exception("Failed to fetch data for %s", url)
211+
212+
# Log total data to a separate W&B run
213+
logging.info("Logging total usage to W&B")
214+
total_run = _init_wandb(instance_name=None)
215+
_log_time_series_to_wandb(total_run, hourly_totals, daily_totals)
216+
217+
# Log each instance's data to separate W&B runs
218+
for url, hourly_data in hourly_per_instance.items():
219+
instance_name = url.split("//")[-1].replace(".service-now.com", "")
220+
logging.info(f"Logging {instance_name} usage to W&B")
221+
222+
instance_run = _init_wandb(instance_name=instance_name)
223+
daily_data = daily_per_instance[url]
224+
_log_time_series_to_wandb(instance_run, hourly_data, daily_data)
225+
226+
# Print summary
227+
total_created = sum(count for _, count in summaries)
228+
print(f"\nTotal tasks run across instances: {total_created}")
229+
230+
for url, count in summaries:
231+
print(f"{url}: {count} task(s) run")
232+
233+
if daily_totals:
234+
print("\nDaily task runs (UTC):")
235+
for bucket in sorted(daily_totals.keys()):
236+
ts_str = bucket.strftime("%Y-%m-%d")
237+
print(f"{ts_str}: {daily_totals[bucket]}")
238+
239+
if hourly_totals:
240+
print("\nHourly task runs (UTC):")
241+
for bucket in sorted(hourly_totals.keys()):
242+
ts_str = bucket.strftime("%Y-%m-%d %H:%M")
243+
print(f"{ts_str}: {hourly_totals[bucket]}")
244+
245+
246+
if __name__ == "__main__":
247+
main()

0 commit comments

Comments
 (0)