|
12 | 12 | import time |
13 | 13 | import requests |
14 | 14 | import pandas as pd |
| 15 | +from dateutil.relativedelta import relativedelta |
15 | 16 |
|
16 | 17 | # Simplify Firebase initialization |
17 | 18 | if not firebase_admin._apps: |
@@ -285,6 +286,62 @@ def scheduled_full_model_fit(event: scheduler_fn.ScheduledEvent): |
285 | 286 | logging.error(f"Error in full model training workflow: {e}") |
286 | 287 |
|
287 | 288 |
|
| 289 | +@scheduler_fn.on_schedule( |
| 290 | + schedule="0 1 * * 1", # Run at 1 AM every Monday |
| 291 | + timezone="Europe/Zurich", |
| 292 | +) |
| 293 | +def scheduled_cleanup_old_predictions(event: scheduler_fn.ScheduledEvent): |
| 294 | + """ |
| 295 | + Weekly scheduled task to clean up old prediction data. |
| 296 | + |
| 297 | + Deletes prediction documents that have a last_updated timestamp older than one month. |
| 298 | + This helps keep the database size manageable and removes outdated predictions. |
| 299 | + """ |
| 300 | + try: |
| 301 | + # Calculate cutoff date (1 month ago) |
| 302 | + cutoff_date = datetime.now(ZoneInfo("Europe/Zurich")) - relativedelta(months=1) |
| 303 | + logging.info(f"Cleaning up predictions older than: {cutoff_date.isoformat()}") |
| 304 | + |
| 305 | + # Get reference to predictions collection |
| 306 | + predictions_ref = ( |
| 307 | + db.collection("freespace_data") |
| 308 | + .document("Hallenbad_City") |
| 309 | + .collection("predictions") |
| 310 | + ) |
| 311 | + |
| 312 | + # Get all prediction documents |
| 313 | + docs = predictions_ref.stream() |
| 314 | + deleted_count = 0 |
| 315 | + |
| 316 | + for doc in docs: |
| 317 | + doc_data = doc.to_dict() |
| 318 | + # Check if last_updated exists and is older than cutoff date |
| 319 | + if "last_updated" in doc_data: |
| 320 | + last_updated = doc_data["last_updated"] |
| 321 | + |
| 322 | + # Convert to datetime if it's a timestamp |
| 323 | + if not isinstance(last_updated, datetime): |
| 324 | + try: |
| 325 | + last_updated = datetime.fromisoformat(str(last_updated)) |
| 326 | + except (ValueError, TypeError): |
| 327 | + logging.warning(f"Invalid timestamp format in document: {doc.id}") |
| 328 | + continue |
| 329 | + |
| 330 | + # Add timezone info if missing |
| 331 | + if last_updated.tzinfo is None: |
| 332 | + last_updated = last_updated.replace(tzinfo=ZoneInfo("Europe/Zurich")) |
| 333 | + |
| 334 | + # Delete if older than cutoff date |
| 335 | + if last_updated < cutoff_date: |
| 336 | + doc.reference.delete() |
| 337 | + deleted_count += 1 |
| 338 | + |
| 339 | + logging.info(f"Deleted {deleted_count} outdated prediction documents") |
| 340 | + |
| 341 | + except Exception as e: |
| 342 | + logging.error(f"Error cleaning up old predictions: {e}") |
| 343 | + |
| 344 | + |
288 | 345 | if __name__ == "__main__": |
289 | 346 | # For local testing |
290 | 347 | freespace = fetch_freespace() |
|
0 commit comments