Skip to content

Commit 0397cea

Browse files
committed
Merge branch 'main' of github.com:LBHackney-IT/Data-Platform into fix-the-warning-message-of-terraform
2 parents bc3f98f + 5507983 commit 0397cea

File tree

53 files changed

+3136
-832
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3136
-832
lines changed

docker/sql-to-parquet/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ RUN apk add --update --no-cache \
44
bash \
55
unzip \
66
mariadb-connector-c-dev \
7-
mysql mysql-client
7+
mariadb-client
88

99
# Install AWS CLI
1010
RUN apk add --no-cache \

docker/sql-to-parquet/entrypoint.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ echo "Unzipping file..."
5050
unzip "${FILENAME}".zip
5151

5252
echo "Dropping and recreating RDS database if it exists..."
53-
echo "DROP DATABASE IF EXISTS ${DBNAME}" | mysql ${MYSQL_CONN_PARAMS}
54-
echo "CREATE DATABASE IF NOT EXISTS ${DBNAME}" | mysql ${MYSQL_CONN_PARAMS}
53+
echo "DROP DATABASE IF EXISTS ${DBNAME}" | mariadb ${MYSQL_CONN_PARAMS}
54+
echo "CREATE DATABASE IF NOT EXISTS ${DBNAME}" | mariadb ${MYSQL_CONN_PARAMS}
5555

5656
echo "Running SQL from zip into RDS database..."
57-
mysql ${MYSQL_CONN_PARAMS} --database=${DBNAME} < *.sql
57+
mariadb ${MYSQL_CONN_PARAMS} --database=${DBNAME} < *.sql
5858

5959
echo "Taking snapshot of RDS database..."
6060
aws rds create-db-snapshot --db-instance-identifier "${RDS_INSTANCE_ID}" --db-snapshot-identifier "${SNAPSHOT_ID}"

lambdas/g_drive_folder_to_s3/Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ google-api-python-client = "*"
1111
boto3 = "*"
1212

1313
[requires]
14-
python_version = "3.10"
14+
python_version = "3.11"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
pandas==2.3.1
1+
pandas==2.3.2
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
requests==2.32.4
2-
httplib2==0.22.0
1+
requests==2.32.5
2+
httplib2==0.31.0
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
gspread==6.2.1
22
oauth2client==4.1.3
3-
google-api-python-client==2.176.0
3+
google-api-python-client==2.183.0
44
yagmail==0.15.293
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
s3fs==2025.7.0
1+
s3fs==2025.9.0

lambdas/rds_snapshot_export_s3_to_s3_copier/main.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,28 @@ def get_date_time(source_identifier: str) -> tuple[str, str, str, str]:
1919
source_identifier (str): source identifier taken from the
2020
event, as implemented this will include datetime for the snapshot as
2121
applicable in the form sql-to-parquet-yy-mm-dd-hhmmss or
22-
sql-to-parquet-yy-mm-dd-hhmmss-backdated
22+
sql-to-parquet-yyyy-mm-dd-backdated
2323
2424
Returns:
2525
tuple(str, str, str, str): year, month, day, date
2626
"""
2727

28-
pattern = r"^sql-to-parquet-(\d{2})-(\d{2})-(\d{2})-(\d{6})(-backdated)?$"
29-
30-
if not re.match(pattern, source_identifier):
28+
pattern_with_time = r"^sql-to-parquet-(\d{2})-(\d{2})-(\d{2})-(\d{6})$"
29+
pattern_backdated = r"^sql-to-parquet-(\d{4})-(\d{2})-(\d{2})-backdated$"
30+
31+
if re.match(pattern_with_time, source_identifier):
32+
split_identifier = source_identifier.split("-")
33+
day = split_identifier[5]
34+
month = split_identifier[4]
35+
year = "20" + split_identifier[3]
36+
elif re.match(pattern_backdated, source_identifier):
37+
split_identifier = source_identifier.split("-")
38+
day = split_identifier[5]
39+
month = split_identifier[4]
40+
year = split_identifier[3]
41+
else:
3142
raise ValueError("Invalid source identifier format")
3243

33-
split_identifier = source_identifier.split("-")
34-
day = split_identifier[5]
35-
month = split_identifier[4]
36-
year = "20" + split_identifier[3]
37-
3844
date = f"{year}{month}{day}"
3945
return year, month, day, date
4046

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import pytest
2+
from main import get_date_time
3+
4+
5+
class TestGetDateTime:
6+
def test_get_date_time_yy_format_with_time(self):
7+
source_identifier = "sql-to-parquet-23-12-25-143000"
8+
year, month, day, date = get_date_time(source_identifier)
9+
10+
assert year == "2023"
11+
assert month == "12"
12+
assert day == "25"
13+
assert date == "20231225"
14+
15+
def test_get_date_time_yyyy_format_backdated(self):
16+
source_identifier = "sql-to-parquet-2023-12-25-backdated"
17+
year, month, day, date = get_date_time(source_identifier)
18+
19+
assert year == "2023"
20+
assert month == "12"
21+
assert day == "25"
22+
assert date == "20231225"
23+
24+
def test_get_date_time_yyyy_format_backdated_different_date(self):
25+
source_identifier = "sql-to-parquet-2024-01-15-backdated"
26+
year, month, day, date = get_date_time(source_identifier)
27+
28+
assert year == "2024"
29+
assert month == "01"
30+
assert day == "15"
31+
assert date == "20240115"
32+
33+
def test_get_date_time_yy_format_different_time(self):
34+
source_identifier = "sql-to-parquet-24-03-10-090000"
35+
year, month, day, date = get_date_time(source_identifier)
36+
37+
assert year == "2024"
38+
assert month == "03"
39+
assert day == "10"
40+
assert date == "20240310"
41+
42+
def test_get_date_time_invalid_format_raises_error(self):
43+
invalid_identifiers = [
44+
"sql-to-parquet-2023-12-25", # Missing -backdated for yyyy format
45+
"sql-to-parquet-23-12-25", # Missing time for yy format
46+
"sql-to-parquet-23-12-25-143000-backdated", # Invalid: yy format cannot have -backdated
47+
"invalid-format-23-12-25-143000", # Wrong prefix
48+
"sql-to-parquet-23-12-25-14300", # Wrong time format (5 digits)
49+
"sql-to-parquet-23-12-25-1430000", # Wrong time format (7 digits)
50+
"sql-to-parquet-2023-12-backdated", # Missing day
51+
"sql-to-parquet-123-12-25-143000", # 3-digit year
52+
]
53+
54+
for invalid_id in invalid_identifiers:
55+
with pytest.raises(ValueError, match="Invalid source identifier format"):
56+
get_date_time(invalid_id)
57+
58+
def test_get_date_time_edge_cases(self):
59+
# Test with single digit month/day (should still work with zero padding)
60+
source_identifier = "sql-to-parquet-23-01-05-000000"
61+
year, month, day, date = get_date_time(source_identifier)
62+
63+
assert year == "2023"
64+
assert month == "01"
65+
assert day == "05"
66+
assert date == "20230105"
67+
68+
def test_get_date_time_leap_year(self):
69+
# Test leap year date
70+
source_identifier = "sql-to-parquet-2024-02-29-backdated"
71+
year, month, day, date = get_date_time(source_identifier)
72+
73+
assert year == "2024"
74+
assert month == "02"
75+
assert day == "29"
76+
assert date == "20240229"

lambdas/set_budget_limit_amount/Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ name = "pypi"
99
boto3 = "*"
1010

1111
[requires]
12-
python_version = "3.9"
12+
python_version = "3.11"

0 commit comments

Comments
 (0)