Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[flake8]
ignore = E501,E731,W503,E203
ignore = E501,E731,W503,E203,F824
exclude =
.venv
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ __pycache__
# config
config/local.json
.envrc
.poetry

# logs
gobble.log
s3_upload.log

# ides
.cursor
.vscode
1 change: 1 addition & 0 deletions devops/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ echo "Deploying Gobble..."
echo "View stack log here: https://$AWS_REGION.console.aws.amazon.com/cloudformation/home?region=$AWS_REGION"

aws cloudformation deploy --stack-name $STACK_NAME \
--tags service=gobble env=prod \
--template-file cloudformation.json \
--capabilities CAPABILITY_NAMED_IAM \
--no-fail-on-empty-changeset
Expand Down
697 changes: 348 additions & 349 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ python-json-logger = "^2.0.7"
coverage = "^7.6.10"
debugpy = "^1.8.12"

[tool.poetry.dev-dependencies]
[tool.poetry.group.dev.dependencies]
pip = ">=24.0"
flake8 = "^7.1.1"
black = "^24.10.0"
pytest = "~8.3.4"
flake8 = "^7.2.0"
black = "^25.1.0"
pytest = "~8.3.5"

[tool.poetry.requires-plugins]
poetry-plugin-export = "^1.9.0"
Expand Down
26 changes: 26 additions & 0 deletions src/disk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import os
import pathlib
from datetime import datetime, timedelta
from util import output_dir_path
from ddtrace import tracer

Expand Down Expand Up @@ -47,3 +48,28 @@ def write_event(event: dict):
if not file_exists:
writer.writeheader()
writer.writerow(event)


def cleanup_old_files():
"""Delete CSV files older than 6 months."""
logger.info("Starting cleanup of old files")
cutoff = datetime.now() - timedelta(days=180)
deleted = 0

def scan_and_cleanup(path: pathlib.Path):
nonlocal deleted
with os.scandir(path) as it:
for entry in it:
try:
if entry.is_dir(follow_symlinks=False):
scan_and_cleanup(entry.path)
elif entry.is_file(follow_symlinks=False) and entry.name == CSV_FILENAME:
if datetime.fromtimestamp(entry.stat().st_mtime) < cutoff:
os.unlink(entry.path)
deleted += 1
logger.info(f"Deleted old file: {entry.path}")
except Exception as e:
logger.warning(f"Skipping {entry.path}: {e}")

scan_and_cleanup(DATA_DIR)
logger.info(f"Completed cleanup — deleted {deleted} file(s)")
5 changes: 4 additions & 1 deletion src/s3_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import logging

from config import CONFIG
from disk import DATA_DIR
from disk import DATA_DIR, cleanup_old_files
from logger import set_up_logging
from util import EASTERN_TIME, service_date

Expand Down Expand Up @@ -62,6 +62,9 @@ def upload_todays_events_to_s3():
end_time = time.time()
logger.info(f"Uploaded {len(files_updated_today)} files to s3, took {end_time - start_time} seconds.")

# cleanup old files, free up disk space
cleanup_old_files()


if __name__ == "__main__":
logger = set_up_logging(__file__)
Expand Down