Skip to content

Commit abdf285

Browse files
aldro61claude
andcommitted
Add daily pool usage monitoring with W&B integration
- Add monitor_pool_usage.py script to track task runs across instances - Log hourly and daily usage metrics to Weights & Biases - Create separate W&B runs for each instance and total usage - Add GitHub Actions workflow for daily automated monitoring - Update requirements.txt to include wandb dependency 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <[email protected]>
1 parent 6d4194f commit abdf285

File tree

3 files changed

+276
-0
lines changed

3 files changed

+276
-0
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: Monitor Pool Usage
2+
3+
on:
4+
schedule:
5+
# Run daily at 00:00 UTC
6+
- cron: '0 0 * * *'
7+
workflow_dispatch: # Allow manual trigger
8+
9+
jobs:
10+
monitor:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v4
16+
17+
- name: Set up Python
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version: '3.10'
21+
22+
- name: Install dependencies
23+
run: |
24+
pip install -r requirements.txt
25+
26+
- name: Run monitoring script
27+
env:
28+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
29+
run: |
30+
python monitor_pool_usage.py

monitor_pool_usage.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
#!/usr/bin/env python
2+
"""
3+
Count how many users were created recently on each ServiceNow instance in the pool.
4+
5+
This reuses the instance loader and table API helper from the codebase.
6+
"""
7+
8+
import logging
9+
from collections import defaultdict
10+
from datetime import datetime, timedelta, timezone
11+
from typing import Dict, List, Tuple
12+
13+
WAND_ENTITY = "alexdrouin"
14+
WAND_PROJECT = "workarena-monitoring"
15+
RUN_VERSION = "v2" # Increment if you need to recreate runs after deletion
16+
17+
from browsergym.workarena.api.utils import table_api_call
18+
from browsergym.workarena.instance import SNowInstance, fetch_instances
19+
20+
21+
def _time_window(hours: int = 24) -> Tuple[str, str]:
22+
end = datetime.now(timezone.utc)
23+
start = end - timedelta(hours=hours)
24+
ts_format = "%Y-%m-%d %H:%M:%S"
25+
return start.strftime(ts_format), end.strftime(ts_format)
26+
27+
28+
def _fetch_user_creations(
29+
instance: SNowInstance, start_ts: str, end_ts: str
30+
) -> List[Dict[str, str]]:
31+
# Query the audit log directly so deleted users are still counted.
32+
page_size = 10000 # avoid the default 100-row limit
33+
offset = 0
34+
seen: Dict[str, Dict[str, str]] = {}
35+
while True:
36+
params = {
37+
"sysparm_query": f"tablename=sys_user^sys_created_on>={start_ts}^sys_created_on<{end_ts}",
38+
"sysparm_fields": "documentkey,sys_created_on,user,fieldname,newvalue",
39+
"sysparm_limit": page_size,
40+
"sysparm_offset": offset,
41+
}
42+
response = table_api_call(instance=instance, table="sys_audit", params=params)
43+
batch = response.get("result", [])
44+
for audit in batch:
45+
doc = audit.get("documentkey")
46+
if not doc:
47+
continue
48+
# Keep the earliest audit entry per user record.
49+
if doc not in seen or audit.get("sys_created_on", "") < seen[doc].get("sys_created_on", ""):
50+
seen[doc] = audit
51+
if len(batch) < page_size:
52+
break
53+
offset += page_size
54+
return list(seen.values())
55+
56+
57+
def _parse_sys_created(ts: str | None) -> datetime | None:
58+
if not ts:
59+
return None
60+
ts = ts.replace("Z", "+00:00")
61+
# Try ISO parsing with timezone if provided
62+
try:
63+
dt = datetime.fromisoformat(ts)
64+
except ValueError:
65+
dt = None
66+
if dt is None:
67+
for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"):
68+
try:
69+
dt = datetime.strptime(ts, fmt)
70+
break
71+
except ValueError:
72+
continue
73+
if dt is None:
74+
return None
75+
if dt.tzinfo is None:
76+
dt = dt.replace(tzinfo=timezone.utc)
77+
return dt.astimezone(timezone.utc)
78+
79+
80+
def _hourly_counts(records: List[Dict[str, str]]) -> Dict[datetime, int]:
81+
buckets: Dict[datetime, int] = defaultdict(int)
82+
for record in records:
83+
ts = _parse_sys_created(record.get("sys_created_on"))
84+
if ts is None:
85+
continue
86+
bucket = ts.replace(minute=0, second=0, microsecond=0)
87+
buckets[bucket] += 1
88+
return buckets
89+
90+
91+
def _daily_counts(records: List[Dict[str, str]]) -> Dict[datetime, int]:
92+
buckets: Dict[datetime, int] = defaultdict(int)
93+
for record in records:
94+
ts = _parse_sys_created(record.get("sys_created_on"))
95+
if ts is None:
96+
continue
97+
bucket = ts.replace(hour=0, minute=0, second=0, microsecond=0)
98+
buckets[bucket] += 1
99+
return buckets
100+
101+
102+
def _init_wandb(instance_name: str | None = None):
103+
try:
104+
import wandb
105+
except ImportError as exc:
106+
raise SystemExit("wandb is required for logging; install it to enable W&B logging.") from exc
107+
108+
# Use instance name or "total" as the display name
109+
display_name = instance_name or "total"
110+
# Add version suffix to run ID to avoid conflicts with deleted runs
111+
run_id = f"{display_name}-{RUN_VERSION}"
112+
113+
run = wandb.init(
114+
project=WAND_PROJECT,
115+
entity=WAND_ENTITY,
116+
name=display_name, # Clean name for display
117+
mode="online",
118+
id=run_id, # Versioned ID for persistence
119+
resume="allow",
120+
settings=wandb.Settings(init_timeout=180),
121+
config={
122+
"hours": 24,
123+
"instance": display_name,
124+
},
125+
)
126+
return run
127+
128+
129+
def _log_time_series_to_wandb(
130+
run,
131+
hourly_data: Dict[datetime, int],
132+
daily_data: Dict[datetime, int],
133+
):
134+
"""Log time series data to a W&B run, ensuring chronological order."""
135+
if run is None:
136+
return
137+
138+
import wandb
139+
140+
# Define metrics to allow out-of-order logging based on timestamp
141+
run.define_metric("daily_tasks_run", step_metric="timestamp", summary="last")
142+
run.define_metric("hourly_tasks_run", step_metric="timestamp", summary="last")
143+
run.define_metric("date", step_metric="timestamp")
144+
145+
# Combine all timestamps and sort them chronologically
146+
all_data = []
147+
148+
# Add daily data points
149+
for bucket, count in daily_data.items():
150+
all_data.append((bucket, "daily_tasks_run", count))
151+
152+
# Add hourly data points
153+
for bucket, count in hourly_data.items():
154+
all_data.append((bucket, "hourly_tasks_run", count))
155+
156+
# Sort by timestamp
157+
all_data.sort(key=lambda x: x[0])
158+
159+
# Log in chronological order with human-readable date
160+
for bucket, metric_name, count in all_data:
161+
run.log({
162+
"timestamp": int(bucket.timestamp()),
163+
metric_name: count,
164+
"date": bucket, # Pass datetime object directly for W&B to format
165+
})
166+
167+
run.finish()
168+
169+
170+
def main():
171+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
172+
173+
start_ts, end_ts = _time_window()
174+
logging.info("Checking user creations between %s and %s (UTC)", start_ts, end_ts)
175+
176+
instances = fetch_instances()
177+
if not instances:
178+
raise SystemExit("No ServiceNow instances available.")
179+
180+
summaries: List[Tuple[str, int]] = []
181+
hourly_totals: Dict[datetime, int] = defaultdict(int)
182+
hourly_per_instance: Dict[str, Dict[datetime, int]] = {}
183+
daily_totals: Dict[datetime, int] = defaultdict(int)
184+
daily_per_instance: Dict[str, Dict[datetime, int]] = {}
185+
186+
# Fetch data from all instances
187+
for entry in instances:
188+
url = entry["url"]
189+
logging.info("Querying %s", url)
190+
try:
191+
instance = SNowInstance(
192+
snow_url=url, snow_credentials=("admin", entry["password"])
193+
)
194+
creations = _fetch_user_creations(
195+
instance=instance, start_ts=start_ts, end_ts=end_ts
196+
)
197+
summaries.append((url, len(creations)))
198+
hourly = _hourly_counts(creations)
199+
for bucket, count in hourly.items():
200+
hourly_totals[bucket] += count
201+
hourly_per_instance[url] = hourly
202+
daily = _daily_counts(creations)
203+
for bucket, count in daily.items():
204+
daily_totals[bucket] += count
205+
daily_per_instance[url] = daily
206+
logging.info("...found %s tasks run", len(creations))
207+
except Exception:
208+
logging.exception("Failed to fetch data for %s", url)
209+
210+
# Log total data to a separate W&B run
211+
logging.info("Logging total usage to W&B")
212+
total_run = _init_wandb(instance_name=None)
213+
_log_time_series_to_wandb(total_run, hourly_totals, daily_totals)
214+
215+
# Log each instance's data to separate W&B runs
216+
for url, hourly_data in hourly_per_instance.items():
217+
instance_name = url.split("//")[-1].replace(".service-now.com", "")
218+
logging.info(f"Logging {instance_name} usage to W&B")
219+
220+
instance_run = _init_wandb(instance_name=instance_name)
221+
daily_data = daily_per_instance[url]
222+
_log_time_series_to_wandb(instance_run, hourly_data, daily_data)
223+
224+
# Print summary
225+
total_created = sum(count for _, count in summaries)
226+
print(f"\nTotal tasks run across instances: {total_created}")
227+
228+
for url, count in summaries:
229+
print(f"{url}: {count} task(s) run")
230+
231+
if daily_totals:
232+
print("\nDaily task runs (UTC):")
233+
for bucket in sorted(daily_totals.keys()):
234+
ts_str = bucket.strftime("%Y-%m-%d")
235+
print(f"{ts_str}: {daily_totals[bucket]}")
236+
237+
if hourly_totals:
238+
print("\nHourly task runs (UTC):")
239+
for bucket in sorted(hourly_totals.keys()):
240+
ts_str = bucket.strftime("%Y-%m-%d %H:%M")
241+
print(f"{ts_str}: {hourly_totals[bucket]}")
242+
243+
244+
if __name__ == "__main__":
245+
main()

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ requests>=2.31
66
tenacity>=8.2.3 # only used in cheat() -> move to tests?
77
tqdm>=4.66.2
88
huggingface_hub>=0.23
9+
wandb>=0.16

0 commit comments

Comments
 (0)