Skip to content

Commit 5f88da1

Browse files
ammend get_date_time and tests
1 parent 94a51a7 commit 5f88da1

File tree

2 files changed

+100
-9
lines changed

2 files changed

+100
-9
lines changed

lambdas/rds_snapshot_export_s3_to_s3_copier/main.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,28 @@ def get_date_time(source_identifier: str) -> tuple[str, str, str, str]:
1919
source_identifier (str): source identifier taken from the
2020
event, as implemented this will include datetime for the snapshot as
2121
applicable in the form sql-to-parquet-yy-mm-dd-hhmmss or
22-
sql-to-parquet-yy-mm-dd-hhmmss-backdated
22+
sql-to-parquet-yyyy-mm-dd-backdated
2323
2424
Returns:
2525
tuple(str, str, str, str): year, month, day, date
2626
"""
2727

28-
pattern = r"^sql-to-parquet-(\d{2})-(\d{2})-(\d{2})-(\d{6})(-backdated)?$"
29-
30-
if not re.match(pattern, source_identifier):
28+
pattern_with_time = r"^sql-to-parquet-(\d{2})-(\d{2})-(\d{2})-(\d{6})(-backdated)?$"
29+
pattern_backdated = r"^sql-to-parquet-(\d{4})-(\d{2})-(\d{2})-backdated$"
30+
31+
if re.match(pattern_with_time, source_identifier):
32+
split_identifier = source_identifier.split("-")
33+
day = split_identifier[5]
34+
month = split_identifier[4]
35+
year = "20" + split_identifier[3]
36+
elif re.match(pattern_backdated, source_identifier):
37+
split_identifier = source_identifier.split("-")
38+
day = split_identifier[5]
39+
month = split_identifier[4]
40+
year = split_identifier[3]
41+
else:
3142
raise ValueError("Invalid source identifier format")
3243

33-
split_identifier = source_identifier.split("-")
34-
day = split_identifier[5]
35-
month = split_identifier[4]
36-
year = "20" + split_identifier[3]
37-
3844
date = f"{year}{month}{day}"
3945
return year, month, day, date
4046

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import pytest
2+
from main import get_date_time
3+
4+
5+
class TestGetDateTime:
6+
7+
def test_get_date_time_yy_format_with_time(self):
8+
source_identifier = "sql-to-parquet-23-12-25-143000"
9+
year, month, day, date = get_date_time(source_identifier)
10+
11+
assert year == "2023"
12+
assert month == "12"
13+
assert day == "25"
14+
assert date == "20231225"
15+
16+
def test_get_date_time_yy_format_with_time_backdated(self):
17+
source_identifier = "sql-to-parquet-23-12-25-143000-backdated"
18+
year, month, day, date = get_date_time(source_identifier)
19+
20+
assert year == "2023"
21+
assert month == "12"
22+
assert day == "25"
23+
assert date == "20231225"
24+
25+
def test_get_date_time_yyyy_format_backdated(self):
26+
source_identifier = "sql-to-parquet-2023-12-25-backdated"
27+
year, month, day, date = get_date_time(source_identifier)
28+
29+
assert year == "2023"
30+
assert month == "12"
31+
assert day == "25"
32+
assert date == "20231225"
33+
34+
def test_get_date_time_yyyy_format_backdated_different_date(self):
35+
source_identifier = "sql-to-parquet-2024-01-15-backdated"
36+
year, month, day, date = get_date_time(source_identifier)
37+
38+
assert year == "2024"
39+
assert month == "01"
40+
assert day == "15"
41+
assert date == "20240115"
42+
43+
def test_get_date_time_yy_format_different_time(self):
44+
source_identifier = "sql-to-parquet-24-03-10-090000"
45+
year, month, day, date = get_date_time(source_identifier)
46+
47+
assert year == "2024"
48+
assert month == "03"
49+
assert day == "10"
50+
assert date == "20240310"
51+
52+
def test_get_date_time_invalid_format_raises_error(self):
53+
invalid_identifiers = [
54+
"sql-to-parquet-2023-12-25", # Missing -backdated for yyyy format
55+
"sql-to-parquet-23-12-25", # Missing time for yy format
56+
"invalid-format-23-12-25-143000", # Wrong prefix
57+
"sql-to-parquet-23-12-25-14300", # Wrong time format (5 digits)
58+
"sql-to-parquet-23-12-25-1430000", # Wrong time format (7 digits)
59+
"sql-to-parquet-2023-12-backdated", # Missing day
60+
"sql-to-parquet-123-12-25-143000", # 3-digit year
61+
]
62+
63+
for invalid_id in invalid_identifiers:
64+
with pytest.raises(ValueError, match="Invalid source identifier format"):
65+
get_date_time(invalid_id)
66+
67+
def test_get_date_time_edge_cases(self):
68+
# Test with single digit month/day (should still work with zero padding)
69+
source_identifier = "sql-to-parquet-23-01-05-000000"
70+
year, month, day, date = get_date_time(source_identifier)
71+
72+
assert year == "2023"
73+
assert month == "01"
74+
assert day == "05"
75+
assert date == "20230105"
76+
77+
def test_get_date_time_leap_year(self):
78+
# Test leap year date
79+
source_identifier = "sql-to-parquet-2024-02-29-backdated"
80+
year, month, day, date = get_date_time(source_identifier)
81+
82+
assert year == "2024"
83+
assert month == "02"
84+
assert day == "29"
85+
assert date == "20240229"

0 commit comments

Comments
 (0)