Skip to content

Commit 58edc51

Browse files
committed
feat: add weekly scheduled cleanup for old prediction data in Firestore
1 parent f8fe149 commit 58edc51

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

firestore.rules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ service cloud.firestore {
1414

1515
match /{document=**} {
1616
allow read: if true;
17-
allow write: if isServiceAccount() || isLocalDevelopment();
17+
allow create, update, delete: if isServiceAccount() || isLocalDevelopment();
1818
}
1919
}
2020
}

functions/main.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import time
1313
import requests
1414
import pandas as pd
15+
from dateutil.relativedelta import relativedelta
1516

1617
# Simplify Firebase initialization
1718
if not firebase_admin._apps:
@@ -285,6 +286,62 @@ def scheduled_full_model_fit(event: scheduler_fn.ScheduledEvent):
285286
logging.error(f"Error in full model training workflow: {e}")
286287

287288

289+
@scheduler_fn.on_schedule(
290+
schedule="0 1 * * 1", # Run at 1 AM every Monday
291+
timezone="Europe/Zurich",
292+
)
293+
def scheduled_cleanup_old_predictions(event: scheduler_fn.ScheduledEvent):
294+
"""
295+
Weekly scheduled task to clean up old prediction data.
296+
297+
Deletes prediction documents that have a last_updated timestamp older than one month.
298+
This helps keep the database size manageable and removes outdated predictions.
299+
"""
300+
try:
301+
# Calculate cutoff date (1 month ago)
302+
cutoff_date = datetime.now(ZoneInfo("Europe/Zurich")) - relativedelta(months=1)
303+
logging.info(f"Cleaning up predictions older than: {cutoff_date.isoformat()}")
304+
305+
# Get reference to predictions collection
306+
predictions_ref = (
307+
db.collection("freespace_data")
308+
.document("Hallenbad_City")
309+
.collection("predictions")
310+
)
311+
312+
# Get all prediction documents
313+
docs = predictions_ref.stream()
314+
deleted_count = 0
315+
316+
for doc in docs:
317+
doc_data = doc.to_dict()
318+
# Check if last_updated exists and is older than cutoff date
319+
if "last_updated" in doc_data:
320+
last_updated = doc_data["last_updated"]
321+
322+
# Convert to datetime if it's a timestamp
323+
if not isinstance(last_updated, datetime):
324+
try:
325+
last_updated = datetime.fromisoformat(str(last_updated))
326+
except (ValueError, TypeError):
327+
logging.warning(f"Invalid timestamp format in document: {doc.id}")
328+
continue
329+
330+
# Add timezone info if missing
331+
if last_updated.tzinfo is None:
332+
last_updated = last_updated.replace(tzinfo=ZoneInfo("Europe/Zurich"))
333+
334+
# Delete if older than cutoff date
335+
if last_updated < cutoff_date:
336+
doc.reference.delete()
337+
deleted_count += 1
338+
339+
logging.info(f"Deleted {deleted_count} outdated prediction documents")
340+
341+
except Exception as e:
342+
logging.error(f"Error cleaning up old predictions: {e}")
343+
344+
288345
if __name__ == "__main__":
289346
# For local testing
290347
freespace = fetch_freespace()

0 commit comments

Comments
 (0)