Skip to content

Commit 24b9b1d

Browse files
committed
Update analysis
1 parent 88bdd63 commit 24b9b1d

File tree

2 files changed

+103
-96
lines changed

2 files changed

+103
-96
lines changed

tools/analytics/analyze_runner_usage.py

Lines changed: 5 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
import yaml
6868
from dotenv import load_dotenv
6969

70+
from tools.analytics.cache_manager import CACHE_DIR, CacheManager
71+
7072

7173
load_dotenv()
7274

@@ -136,7 +138,7 @@
136138

137139
USELESS_RUNNER_LABELS = [
138140
"self-hosted", # really, a useless label we want to ignore
139-
"linux.g5.4xlarge.nvidia.cpu", # a nonexistent label used by a repo
141+
"linux.g5.4xlarge.nvidia.cpu", # a nonexistent label used by a repo
140142
]
141143

142144
HEADERS = {
@@ -147,99 +149,6 @@
147149
BASE_URL = "https://api.github.com"
148150
WORKFLOW_RUN_LOOKBACK = (datetime.utcnow() - timedelta(days=180)).isoformat() + "Z"
149151

150-
# Cache configuration
151-
CACHE_DIR = Path("cache")
152-
CACHE_DIR.mkdir(exist_ok=True)
153-
154-
155-
class CacheManager:
156-
"""Manages caching of GitHub API responses using URL as cache key."""
157-
158-
def __init__(self, cache_dir: Path = CACHE_DIR):
159-
self.cache_dir = cache_dir
160-
self.cache_dir.mkdir(exist_ok=True)
161-
162-
def _get_cache_key(self, url: str) -> str:
163-
"""Generate a human-readable cache key from URL."""
164-
import re
165-
from urllib.parse import parse_qs, urlencode, urlparse
166-
167-
# Parse the URL to separate path and query parameters
168-
parsed = urlparse(url)
169-
path = parsed.path
170-
query_params = parse_qs(parsed.query)
171-
172-
# Remove the 'created' parameter from query params to avoid cache invalidation
173-
if "created" in query_params:
174-
del query_params["created"]
175-
176-
# Reconstruct the query string without the 'created' parameter
177-
if query_params:
178-
# Flatten single-item lists (parse_qs returns lists)
179-
flat_params = {}
180-
for key, values in query_params.items():
181-
flat_params[key] = values[0] if len(values) == 1 else values
182-
query_string = urlencode(flat_params)
183-
# Reconstruct URL without the 'created' parameter
184-
url_without_created = (
185-
f"{parsed.scheme}://{parsed.netloc}{path}?{query_string}"
186-
)
187-
else:
188-
# If no query params remain, use the original URL
189-
url_without_created = url
190-
191-
# Replace forward slashes with underscores
192-
key = url_without_created.replace("/", "_")
193-
194-
# Remove protocol and domain
195-
key = key.replace("https___api.github.com_", "")
196-
197-
# Handle illegal filename characters in query parameters
198-
# Replace characters that are problematic in filenames
199-
key = re.sub(r'[<>:"|?*]', "_", key)
200-
201-
# Replace equals signs and ampersands in query params with underscores
202-
key = key.replace("=", "_").replace("&", "_")
203-
204-
# Clean up multiple consecutive underscores
205-
key = re.sub(r"_+", "_", key)
206-
207-
# Remove trailing underscore
208-
key = key.rstrip("_")
209-
210-
return key
211-
212-
def _get_cache_path(self, url: str) -> Path:
213-
"""Get the cache file path for a given URL."""
214-
cache_key = self._get_cache_key(url)
215-
return self.cache_dir / f"{cache_key}.json"
216-
217-
def get(self, url: str) -> Optional[Dict]:
218-
"""Retrieve cached response for a URL."""
219-
cache_path = self._get_cache_path(url)
220-
if cache_path.exists():
221-
try:
222-
with open(cache_path, "r") as f:
223-
cached_data = json.load(f)
224-
logging.debug(f"[CacheManager] Cache hit for URL: {url}")
225-
return cached_data
226-
except (json.JSONDecodeError, IOError) as e:
227-
logging.warning(f"[CacheManager] Failed to read cache for {url}: {e}")
228-
return None
229-
logging.debug(f"[CacheManager] Cache miss for URL: {url}")
230-
return None
231-
232-
def set(self, url: str, data: Dict) -> None:
233-
"""Cache response data for a URL."""
234-
cache_path = self._get_cache_path(url)
235-
try:
236-
with open(cache_path, "w") as f:
237-
json.dump(data, f, indent=2)
238-
logging.debug(f"[CacheManager] Cached response for URL: {url}")
239-
except IOError as e:
240-
logging.error(f"[CacheManager] Failed to write cache for {url}: {e}")
241-
242-
243152
# Global cache manager instance
244153
cache_manager = CacheManager()
245154

@@ -704,8 +613,8 @@ def deep_sort(obj, sort_keys=True):
704613
return {k: deep_sort(obj[k]) for k in keys}
705614
elif isinstance(obj, list):
706615
# If list of dicts with 'repo' key, sort by 'repo', else sort normally
707-
if obj and isinstance(obj[0], dict) and 'repo' in obj[0]:
708-
return sorted([deep_sort(x) for x in obj], key=lambda x: x['repo'])
616+
if obj and isinstance(obj[0], dict) and "repo" in obj[0]:
617+
return sorted([deep_sort(x) for x in obj], key=lambda x: x["repo"])
709618
return sorted(deep_sort(x) for x in obj)
710619
else:
711620
return obj

tools/analytics/cache_manager.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import json
2+
import logging
3+
import re
4+
from pathlib import Path
5+
from typing import Dict, Optional
6+
7+
8+
# Cache configuration
9+
CACHE_DIR = Path("cache")
10+
11+
12+
class CacheManager:
13+
"""Manages caching of GitHub API responses using URL as cache key."""
14+
15+
def __init__(self, cache_dir: Path = CACHE_DIR):
16+
CACHE_DIR.mkdir(exist_ok=True)
17+
18+
self.cache_dir = cache_dir
19+
self.cache_dir.mkdir(exist_ok=True)
20+
21+
def _get_cache_key(self, url: str) -> str:
22+
"""Generate a human-readable cache key from URL."""
23+
from urllib.parse import parse_qs, urlencode, urlparse
24+
25+
# Parse the URL to separate path and query parameters
26+
parsed = urlparse(url)
27+
path = parsed.path
28+
query_params = parse_qs(parsed.query)
29+
30+
# Remove the 'created' parameter from query params to avoid cache invalidation
31+
if "created" in query_params:
32+
del query_params["created"]
33+
34+
# Reconstruct the query string without the 'created' parameter
35+
if query_params:
36+
# Flatten single-item lists (parse_qs returns lists)
37+
flat_params = {}
38+
for key, values in query_params.items():
39+
flat_params[key] = values[0] if len(values) == 1 else values
40+
query_string = urlencode(flat_params)
41+
# Reconstruct URL without the 'created' parameter
42+
url_without_created = (
43+
f"{parsed.scheme}://{parsed.netloc}{path}?{query_string}"
44+
)
45+
else:
46+
# If no query params remain, use the original URL
47+
url_without_created = url
48+
49+
# Replace forward slashes with underscores
50+
key = url_without_created.replace("/", "_")
51+
52+
# Remove protocol and domain
53+
key = key.replace("https___api.github.com_", "")
54+
55+
# Handle illegal filename characters in query parameters
56+
# Replace characters that are problematic in filenames
57+
key = re.sub(r'[<>:"|?*]', "_", key)
58+
59+
# Replace equals signs and ampersands in query params with underscores
60+
key = key.replace("=", "_").replace("&", "_")
61+
62+
# Clean up multiple consecutive underscores
63+
key = re.sub(r"_+", "_", key)
64+
65+
# Remove trailing underscore
66+
key = key.rstrip("_")
67+
68+
return key
69+
70+
def _get_cache_path(self, url: str) -> Path:
71+
"""Get the cache file path for a given URL."""
72+
cache_key = self._get_cache_key(url)
73+
return self.cache_dir / f"{cache_key}.json"
74+
75+
def get(self, url: str) -> Optional[Dict]:
76+
"""Retrieve cached response for a URL."""
77+
cache_path = self._get_cache_path(url)
78+
if cache_path.exists():
79+
try:
80+
with open(cache_path, "r") as f:
81+
cached_data = json.load(f)
82+
logging.debug(f"[CacheManager] Cache hit for URL: {url}")
83+
return cached_data
84+
except (json.JSONDecodeError, IOError) as e:
85+
logging.warning(f"[CacheManager] Failed to read cache for {url}: {e}")
86+
return None
87+
logging.debug(f"[CacheManager] Cache miss for URL: {url}")
88+
return None
89+
90+
def set(self, url: str, data: Dict) -> None:
91+
"""Cache response data for a URL."""
92+
cache_path = self._get_cache_path(url)
93+
try:
94+
with open(cache_path, "w") as f:
95+
json.dump(data, f, indent=2)
96+
logging.debug(f"[CacheManager] Cached response for URL: {url}")
97+
except IOError as e:
98+
logging.error(f"[CacheManager] Failed to write cache for {url}: {e}")

0 commit comments

Comments
 (0)