Skip to content

Commit da7407b

Browse files
authored
Archive select tables whose entries expire (#266)
1 parent 638e8a8 commit da7407b

File tree

14 files changed

+614
-56
lines changed

14 files changed

+614
-56
lines changed

infracost-usage.yml

Lines changed: 50 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,25 @@
33
# these resources are not free.
44
# See docs: https://infracost.io/usage-file/
55
version: 0.1
6+
resource_usage:
7+
module.frontend.aws_s3_bucket.frontend:
8+
standard: # Usages of S3 Standard:
9+
storage_gb: 1 # Total storage in GB.
10+
monthly_tier_1_requests: 10000 # Monthly PUT, COPY, POST, LIST requests (Tier 1).
11+
monthly_tier_2_requests: 125000 # Monthly GET, SELECT, and all other requests (Tier 2).
12+
monthly_select_data_scanned_gb: 0
13+
monthly_select_data_returned_gb: 0
14+
intelligent_tiering: # Usages of S3 Intelligent - Tiering:
15+
frequent_access_storage_gb: 0 # Total storage for Frequent Access Tier in GB.
16+
infrequent_access_storage_gb: 0 # Total storage for Infrequent Access Tier in GB.
17+
monitored_objects: 0 # Total objects monitored by the Intelligent Tiering.
18+
monthly_tier_1_requests: 0 # Monthly PUT, COPY, POST, LIST requests (Tier 1).
19+
monthly_tier_2_requests: 0 # Monthly GET, SELECT, and all other requests (Tier 2).
20+
monthly_lifecycle_transition_requests: 0 # Monthly Lifecycle Transition requests.
21+
monthly_select_data_scanned_gb: 0 # Monthly data scanned by S3 Select in GB.
22+
monthly_select_data_returned_gb: 0 # Monthly data returned by S3 Select in GB.
23+
early_delete_gb: 0 # If an archive is deleted within 1 months of being uploaded, you will be charged an early deletion fee per GB.
24+
625
resource_type_default_usage:
726
aws_acmpca_certificate_authority:
827
monthly_requests: 6 # Monthly private certificate requests.
@@ -40,53 +59,53 @@ resource_type_default_usage:
4059
monthly_duration_secs: 62500 # Monthly duration of non-free handler operations that go above 30 seconds, in seconds.
4160
aws_cloudfront_distribution:
4261
monthly_data_transfer_to_internet_gb: # Monthly regional data transfer out to internet from the following, in GB:
43-
us: 60 # United States, Mexico, Canada
44-
europe: 60 # Europe, Israel
62+
us: 20 # United States, Mexico, Canada
63+
europe: 10 # Europe, Israel
4564
south_africa: 0 # South Africa, Kenya, Middle East
4665
south_america: 0 # South America
4766
japan: 0 # Japan
4867
australia: 0 # Australia, New Zealand
4968
asia_pacific: 0 # Hong Kong, Philippines, Singapore, South Korea, Taiwan, Thailand
5069
india: 0 # India
5170
monthly_data_transfer_to_origin_gb: # Monthly regional data transfer out to origin from the following, in GB:
52-
us: 250 # United States, Mexico, Canada
53-
europe: 250 # Europe, Israel
71+
us: 20 # United States, Mexico, Canada
72+
europe: 10 # Europe, Israel
5473
south_africa: 0 # South Africa, Kenya, Middle East
5574
south_america: 0 # South America
5675
japan: 0 # Japan
5776
australia: 0 # Australia, New Zealand
5877
asia_pacific: 0 # Hong Kong, Philippines, Singapore, South Korea, Taiwan, Thailand
5978
india: 0 # India
6079
monthly_http_requests: # Monthly number of HTTP requests to:
61-
us: 6750000 # United States, Mexico, Canada
62-
europe: 5750000 # Europe, Israel
80+
us: 100000 # United States, Mexico, Canada
81+
europe: 20000 # Europe, Israel
6382
south_africa: 0 # South Africa, Kenya, Middle East
6483
south_america: 0 # South America
6584
japan: 0 # Japan
6685
australia: 0 # Australia, New Zealand
6786
asia_pacific: 0 # Hong Kong, Philippines, Singapore, South Korea, Taiwan, Thailand
6887
india: 0 # India
6988
monthly_https_requests: # Monthly number of HTTPS requests to:
70-
us: 5000000 # United States, Mexico, Canada
71-
europe: 4250000 # Europe, Israel
89+
us: 200000 # United States, Mexico, Canada
90+
europe: 20000 # Europe, Israel
7291
south_africa: 0 # South Africa, Kenya, Middle East
7392
south_america: 0 # South America
7493
japan: 0 # Japan
7594
australia: 0 # Australia, New Zealand
7695
asia_pacific: 0 # Hong Kong, Philippines, Singapore, South Korea, Taiwan, Thailand
7796
india: 0 # India
7897
monthly_shield_requests: # Monthly number of shield requests to:
79-
us: 3125000 # United States
80-
europe: 3125000 # Europe
98+
us: 0 # United States
99+
europe: 0 # Europe
81100
south_america: 0 # South America
82101
japan: 0 # Japan
83102
australia: 0 # Australia
84103
singapore: 0 # Singapore
85104
south_korea: 0 # South Korea
86105
india: 0 # India
87106
monthly_invalidation_requests: 1250 # Monthly number of invalidation requests.
88-
monthly_encryption_requests: 2500000 # Monthly number of field level encryption requests.
89-
monthly_log_lines: 500000000 # Monthly number of real-time log lines.
107+
monthly_encryption_requests: 0 # Monthly number of field level encryption requests.
108+
monthly_log_lines: 0 # Monthly number of real-time log lines.
90109
custom_ssl_certificates: 0 # Number of dedicated IP custom SSL certificates.
91110
aws_cloudhsm_v2_hsm:
92111
monthly_hrs: 3.125 # Monthly number of hours the HSM ran for.
@@ -101,9 +120,9 @@ resource_type_default_usage:
101120
archive_storage_gb: 217 # Archive storage used for event replay in GB.
102121
monthly_schema_discovery_events: 50000000 # Monthly events ingested for schema discovery. Each 8 KB chunk of payload is billed as 1 event.
103122
aws_cloudwatch_log_group:
104-
storage_gb: 166 # Total data stored by CloudWatch logs in GB.
105-
monthly_data_ingested_gb: 10 # Monthly data ingested by CloudWatch logs in GB.
106-
monthly_data_scanned_gb: 1000 # Monthly data scanned by CloudWatch logs insights in GB.
123+
storage_gb: 2 # Total data stored by CloudWatch logs in GB.
124+
monthly_data_ingested_gb: 0.5 # Monthly data ingested by CloudWatch logs in GB.
125+
monthly_data_scanned_gb: 1 # Monthly data scanned by CloudWatch logs insights in GB.
107126
aws_codebuild_project:
108127
monthly_build_mins: 500 # Monthly total duration of builds in minutes. Each build is rounded up to the nearest minute.
109128
aws_config_config_rule:
@@ -117,7 +136,7 @@ resource_type_default_usage:
117136
monthly_rule_evaluations: 5000 # Monthly config rule evaluations.
118137
aws_db_instance:
119138
additional_backup_storage_gb: 238 # Amount of backup storage used that is in excess of 100% of the storage size for all databases in GB.
120-
monthly_standard_io_requests: 50000000 # Monthly number of input/output requests for database.
139+
monthly_standard_io_requests: 50000000 # Monthly number of ∂tinput/output requests for database.
121140
monthly_additional_performance_insights_requests: 500000 # Monthly Performance Insights API requests above the 1000000 requests included in the free tier.
122141
aws_directory_service_directory:
123142
additional_domain_controllers: 0.135 # The number of domain controllers in the directory service provisioned in addition to the minimum 2 controllers
@@ -133,13 +152,13 @@ resource_type_default_usage:
133152
aws_dx_gateway_association:
134153
monthly_data_processed_gb: 250 # Monthly data processed by the DX gateway association per month in GB.
135154
aws_dynamodb_table:
136-
monthly_write_request_units: 4000000 # Monthly write request units in (used for on-demand DynamoDB).
137-
monthly_read_request_units: 20000000 # Monthly read request units in (used for on-demand DynamoDB).
138-
storage_gb: 20 # Total storage for tables in GB.
139-
pitr_backup_storage_gb: 25 # Total storage for Point-In-Time Recovery (PITR) backups in GB.
140-
on_demand_backup_storage_gb: 50 # Total storage for on-demand backups in GB.
141-
monthly_data_restored_gb: 33 # Monthly size of restored data in GB.
142-
monthly_streams_read_request_units: 25000000 # Monthly streams read request units.
155+
monthly_write_request_units: 150 # Monthly write request units in (used for on-demand DynamoDB).
156+
monthly_read_request_units: 300 # Monthly read request units in (used for on-demand DynamoDB).
157+
storage_gb: 3 # Total storage for tables in GB.
158+
pitr_backup_storage_gb: 6 # Total storage for Point-In-Time Recovery (PITR) backups in GB.
159+
on_demand_backup_storage_gb: 6 # Total storage for on-demand backups in GB.
160+
monthly_data_restored_gb: 3 # Monthly size of restored data in GB.
161+
monthly_streams_read_request_units: 700 # Monthly streams read request units.
143162
aws_ebs_snapshot:
144163
monthly_list_block_requests: 8333333 # Monthly number of ListChangedBlocks and ListSnapshotBlocks requests.
145164
monthly_get_block_requests: 1666666 # Monthly number of GetSnapshotBlock requests (block size is 512KiB).
@@ -193,7 +212,7 @@ resource_type_default_usage:
193212
monthly_on_demand_extended_retention_gb: 50 # Monthly data stored by the stream in GB that exceeds the 24 hour retention period but is less than 7 days.
194213
monthly_on_demand_long_term_retention_gb: 217 # Monthly data stored by the stream in GB that exceeds the 7 day retention period.
195214
monthly_provisioned_put_units: 357142857 # Monthly provisioned put units for the stream.
196-
monthly_provisioned_extended_retention_gb: 250 # Monthly data stored by the stream in GB that exceeds the 24 hour retention period but is less than 7 days.
215+
monthly_provisioned_extended_retention_gb : 250 # Monthly data stored by the stream in GB that exceeds the 24 hour retention period but is less than 7 days.
197216
monthly_provisioned_long_term_retention_gb: 217 # Monthly data stored by the stream in GB that exceeds the 7 day retention period.
198217
monthly_provisioned_long_term_retrieval_gb: 238 # Monthly data retrieved by the stream in GB that exceeds the 7 day retention period.
199218
monthly_provisioned_efo_data_out_gb: 384 # Monthly data egressed by the stream in GB to EFO consumers.
@@ -258,15 +277,15 @@ resource_type_default_usage:
258277
aws_s3_bucket_inventory:
259278
monthly_listed_objects: 2000000000 # Monthly number of listed objects.
260279
aws_s3_bucket:
261-
object_tags: 5000000 # Total object tags. Only for AWS provider V3.
280+
object_tags: 50000 # Total object tags. Only for AWS provider V3.
262281
standard: # Usages of S3 Standard:
263-
storage_gb: 225 # Total storage in GB.
264-
monthly_tier_1_requests: 1000000 # Monthly PUT, COPY, POST, LIST requests (Tier 1).
265-
monthly_tier_2_requests: 12500000 # Monthly GET, SELECT, and all other requests (Tier 2).
282+
storage_gb: 10 # Total storage in GB.
283+
monthly_tier_1_requests: 10000 # Monthly PUT, COPY, POST, LIST requests (Tier 1).
284+
monthly_tier_2_requests: 125000 # Monthly GET, SELECT, and all other requests (Tier 2).
266285
monthly_select_data_scanned_gb: 2500 # Monthly data scanned by S3 Select in GB.
267286
monthly_select_data_returned_gb: 7250 # Monthly data returned by S3 Select in GB.
268287
intelligent_tiering: # Usages of S3 Intelligent - Tiering:
269-
frequent_access_storage_gb: 0 # Total storage for Frequent Access Tier in GB.
288+
frequent_access_storage_gb: 30 # Total storage for Frequent Access Tier in GB.
270289
infrequent_access_storage_gb: 0 # Total storage for Infrequent Access Tier in GB.
271290
monitored_objects: 0 # Total objects monitored by the Intelligent Tiering.
272291
monthly_tier_1_requests: 0 # Monthly PUT, COPY, POST, LIST requests (Tier 1).
@@ -335,8 +354,8 @@ resource_type_default_usage:
335354
sms_subscriptions: 0 # Number of SMS subscriptions
336355
sms_notification_price: 0.002 # Average price for each SMS notification
337356
aws_sqs_queue:
338-
monthly_requests: 12500000 # Monthly requests to SQS.
339-
request_size_kb: 16 # Size of requests to SQS, billed in 64KB chunks. So 1M requests at 128KB uses 2M requests.
357+
monthly_requests: 50000 # Monthly requests to SQS.
358+
request_size_kb: 64 # Size of requests to SQS, billed in 64KB chunks. So 1M requests at 128KB uses 2M requests.
340359
aws_ssm_activation:
341360
instances: 1 # Number of instances being managed.
342361
aws_ssm_parameter:

onetime/events-expiration.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import json
2+
import boto3
3+
import time
4+
import logging
5+
from datetime import datetime, timezone
6+
from decimal import Decimal
7+
from botocore.exceptions import ClientError
8+
9+
# --- Configuration ---
10+
TABLE_NAME = "infra-core-api-events"
11+
EVENTS_EXPIRY_AFTER_LAST_OCCURRENCE_DAYS = 365 * 4
12+
13+
# --- Logging Setup ---
14+
logging.basicConfig(
15+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
16+
)
17+
18+
19+
def parse_date_string(date_str: str) -> datetime | None:
20+
"""
21+
Parses an ISO 8601 date string into a timezone-aware datetime object.
22+
Returns None if the string is invalid or empty.
23+
"""
24+
if not date_str:
25+
return None
26+
try:
27+
if date_str.endswith("Z"):
28+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
29+
dt_obj = datetime.fromisoformat(date_str)
30+
if dt_obj.tzinfo is None:
31+
return dt_obj.replace(tzinfo=timezone.utc)
32+
return dt_obj
33+
except ValueError:
34+
logging.warning(f"Could not parse invalid date string: {date_str}")
35+
return None
36+
37+
38+
def determine_expires_at(event: dict) -> int | None:
39+
"""
40+
Calculates the expiration timestamp based on the provided logic.
41+
The event dict should contain keys like 'repeats', 'repeatEnds', and 'end'.
42+
"""
43+
if event.get("repeats") and not event.get("repeatEnds"):
44+
return None
45+
46+
now_ts = int(time.time())
47+
expiry_offset_seconds = 86400 * EVENTS_EXPIRY_AFTER_LAST_OCCURRENCE_DAYS
48+
now_expiry = now_ts + expiry_offset_seconds
49+
50+
end_attr_val = event.get("repeatEnds") if event.get("repeats") else event.get("end")
51+
52+
if not end_attr_val:
53+
return now_expiry
54+
55+
ends_dt = parse_date_string(end_attr_val)
56+
if not ends_dt:
57+
return now_expiry
58+
end_date_expiry = round(ends_dt.timestamp()) + expiry_offset_seconds
59+
60+
return end_date_expiry
61+
62+
63+
def process_table():
64+
"""
65+
Scans the table and updates each item.
66+
"""
67+
try:
68+
dynamodb = boto3.resource("dynamodb")
69+
table = dynamodb.Table(TABLE_NAME)
70+
71+
# A paginator is used to handle scanning tables of any size
72+
paginator = dynamodb.meta.client.get_paginator("scan")
73+
page_iterator = paginator.paginate(TableName=TABLE_NAME)
74+
75+
item_count = 0
76+
updated_count = 0
77+
logging.info(f"Starting to process table: {TABLE_NAME}")
78+
79+
for page in page_iterator:
80+
for item in page.get("Items", []):
81+
item_count += 1
82+
pk_id = item.get("id", {})
83+
if not pk_id:
84+
logging.warning(f"Skipping item with missing 'id': {item}")
85+
continue
86+
87+
# Prepare a simple dict for the logic function
88+
event_data = {
89+
"repeats": item.get("repeats", {}),
90+
"repeatEnds": item.get("repeatEnds", {}),
91+
"end": item.get("end", {}),
92+
}
93+
94+
expires_at_ts = determine_expires_at(event_data)
95+
96+
# Prepare the update expression and values
97+
new_updated_at = (
98+
datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
99+
)
100+
update_expression = "SET updatedAt = :ua"
101+
expression_attribute_values = {":ua": new_updated_at}
102+
103+
if expires_at_ts is not None:
104+
update_expression += ", expiresAt = :ea"
105+
# DynamoDB requires numbers to be passed as Decimal objects
106+
expression_attribute_values[":ea"] = Decimal(expires_at_ts)
107+
108+
# Update the item in DynamoDB
109+
try:
110+
table.update_item(
111+
Key={"id": pk_id},
112+
UpdateExpression=update_expression,
113+
ExpressionAttributeValues=expression_attribute_values,
114+
)
115+
updated_count += 1
116+
if updated_count % 100 == 0:
117+
logging.info(
118+
f"Processed {item_count} items, updated {updated_count} so far..."
119+
)
120+
except ClientError as e:
121+
logging.error(f"Failed to update item {pk_id}: {e}")
122+
123+
logging.info("--- Script Finished ---")
124+
logging.info(f"Total items scanned: {item_count}")
125+
logging.info(f"Total items updated: {updated_count}")
126+
127+
except ClientError as e:
128+
logging.critical(f"A critical AWS error occurred: {e}")
129+
except Exception as e:
130+
logging.critical(f"An unexpected error occurred: {e}")
131+
132+
133+
if __name__ == "__main__":
134+
process_table()

0 commit comments

Comments
 (0)