|
67 | 67 | import yaml
|
68 | 68 | from dotenv import load_dotenv
|
69 | 69 |
|
| 70 | +from tools.analytics.cache_manager import CACHE_DIR, CacheManager |
| 71 | + |
70 | 72 |
|
71 | 73 | load_dotenv()
|
72 | 74 |
|
|
136 | 138 |
|
137 | 139 | USELESS_RUNNER_LABELS = [
|
138 | 140 | "self-hosted", # really, a useless label we want to ignore
|
139 |
| - "linux.g5.4xlarge.nvidia.cpu", # a nonexistent label used by a repo |
| 141 | + "linux.g5.4xlarge.nvidia.cpu", # a nonexistent label used by a repo |
140 | 142 | ]
|
141 | 143 |
|
142 | 144 | HEADERS = {
|
|
147 | 149 | BASE_URL = "https://api.github.com"
|
148 | 150 | WORKFLOW_RUN_LOOKBACK = (datetime.utcnow() - timedelta(days=180)).isoformat() + "Z"
|
149 | 151 |
|
150 |
| -# Cache configuration |
151 |
| -CACHE_DIR = Path("cache") |
152 |
| -CACHE_DIR.mkdir(exist_ok=True) |
153 |
| - |
154 |
| - |
155 |
| -class CacheManager: |
156 |
| - """Manages caching of GitHub API responses using URL as cache key.""" |
157 |
| - |
158 |
| - def __init__(self, cache_dir: Path = CACHE_DIR): |
159 |
| - self.cache_dir = cache_dir |
160 |
| - self.cache_dir.mkdir(exist_ok=True) |
161 |
| - |
162 |
| - def _get_cache_key(self, url: str) -> str: |
163 |
| - """Generate a human-readable cache key from URL.""" |
164 |
| - import re |
165 |
| - from urllib.parse import parse_qs, urlencode, urlparse |
166 |
| - |
167 |
| - # Parse the URL to separate path and query parameters |
168 |
| - parsed = urlparse(url) |
169 |
| - path = parsed.path |
170 |
| - query_params = parse_qs(parsed.query) |
171 |
| - |
172 |
| - # Remove the 'created' parameter from query params to avoid cache invalidation |
173 |
| - if "created" in query_params: |
174 |
| - del query_params["created"] |
175 |
| - |
176 |
| - # Reconstruct the query string without the 'created' parameter |
177 |
| - if query_params: |
178 |
| - # Flatten single-item lists (parse_qs returns lists) |
179 |
| - flat_params = {} |
180 |
| - for key, values in query_params.items(): |
181 |
| - flat_params[key] = values[0] if len(values) == 1 else values |
182 |
| - query_string = urlencode(flat_params) |
183 |
| - # Reconstruct URL without the 'created' parameter |
184 |
| - url_without_created = ( |
185 |
| - f"{parsed.scheme}://{parsed.netloc}{path}?{query_string}" |
186 |
| - ) |
187 |
| - else: |
188 |
| - # If no query params remain, use the original URL |
189 |
| - url_without_created = url |
190 |
| - |
191 |
| - # Replace forward slashes with underscores |
192 |
| - key = url_without_created.replace("/", "_") |
193 |
| - |
194 |
| - # Remove protocol and domain |
195 |
| - key = key.replace("https___api.github.com_", "") |
196 |
| - |
197 |
| - # Handle illegal filename characters in query parameters |
198 |
| - # Replace characters that are problematic in filenames |
199 |
| - key = re.sub(r'[<>:"|?*]', "_", key) |
200 |
| - |
201 |
| - # Replace equals signs and ampersands in query params with underscores |
202 |
| - key = key.replace("=", "_").replace("&", "_") |
203 |
| - |
204 |
| - # Clean up multiple consecutive underscores |
205 |
| - key = re.sub(r"_+", "_", key) |
206 |
| - |
207 |
| - # Remove trailing underscore |
208 |
| - key = key.rstrip("_") |
209 |
| - |
210 |
| - return key |
211 |
| - |
212 |
| - def _get_cache_path(self, url: str) -> Path: |
213 |
| - """Get the cache file path for a given URL.""" |
214 |
| - cache_key = self._get_cache_key(url) |
215 |
| - return self.cache_dir / f"{cache_key}.json" |
216 |
| - |
217 |
| - def get(self, url: str) -> Optional[Dict]: |
218 |
| - """Retrieve cached response for a URL.""" |
219 |
| - cache_path = self._get_cache_path(url) |
220 |
| - if cache_path.exists(): |
221 |
| - try: |
222 |
| - with open(cache_path, "r") as f: |
223 |
| - cached_data = json.load(f) |
224 |
| - logging.debug(f"[CacheManager] Cache hit for URL: {url}") |
225 |
| - return cached_data |
226 |
| - except (json.JSONDecodeError, IOError) as e: |
227 |
| - logging.warning(f"[CacheManager] Failed to read cache for {url}: {e}") |
228 |
| - return None |
229 |
| - logging.debug(f"[CacheManager] Cache miss for URL: {url}") |
230 |
| - return None |
231 |
| - |
232 |
| - def set(self, url: str, data: Dict) -> None: |
233 |
| - """Cache response data for a URL.""" |
234 |
| - cache_path = self._get_cache_path(url) |
235 |
| - try: |
236 |
| - with open(cache_path, "w") as f: |
237 |
| - json.dump(data, f, indent=2) |
238 |
| - logging.debug(f"[CacheManager] Cached response for URL: {url}") |
239 |
| - except IOError as e: |
240 |
| - logging.error(f"[CacheManager] Failed to write cache for {url}: {e}") |
241 |
| - |
242 |
| - |
243 | 152 | # Global cache manager instance
|
244 | 153 | cache_manager = CacheManager()
|
245 | 154 |
|
@@ -704,8 +613,8 @@ def deep_sort(obj, sort_keys=True):
|
704 | 613 | return {k: deep_sort(obj[k]) for k in keys}
|
705 | 614 | elif isinstance(obj, list):
|
706 | 615 | # If list of dicts with 'repo' key, sort by 'repo', else sort normally
|
707 |
| - if obj and isinstance(obj[0], dict) and 'repo' in obj[0]: |
708 |
| - return sorted([deep_sort(x) for x in obj], key=lambda x: x['repo']) |
| 616 | + if obj and isinstance(obj[0], dict) and "repo" in obj[0]: |
| 617 | + return sorted([deep_sort(x) for x in obj], key=lambda x: x["repo"]) |
709 | 618 | return sorted(deep_sort(x) for x in obj)
|
710 | 619 | else:
|
711 | 620 | return obj
|
|
0 commit comments