Skip to content

Commit d639dae

Browse files
committed
timezone.py independence
1 parent 6052ebd commit d639dae

File tree

5 files changed

+129
-75
lines changed

5 files changed

+129
-75
lines changed

functions-python/backfill_dataset_service_date_range/src/main.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sqlalchemy.orm import joinedload
1111
from sqlalchemy import or_, func
1212
from shared.helpers.database import refresh_materialized_view
13+
from shared.helpers.transform import get_nested_value
1314
from shared.helpers.timezone import (
1415
extract_timezone_from_json_validation_report,
1516
get_service_date_range_with_timezone_utc,
@@ -111,17 +112,27 @@ def backfill_datasets(session: "Session"):
111112
response.raise_for_status()
112113
json_data = response.json()
113114

115+
formatting_timezone = extract_timezone_from_json_validation_report(
116+
json_data
117+
)
118+
feed_service_window_start = get_nested_value(
119+
json_data, ["summary", "feedInfo", "feedServiceWindowStart"]
120+
)
121+
feed_service_window_end = get_nested_value(
122+
json_data, ["summary", "feedInfo", "feedServiceWindowEnd"]
123+
)
124+
114125
if (
115-
result := get_service_date_range_with_timezone_utc(json_data)
126+
result := get_service_date_range_with_timezone_utc(
127+
feed_service_window_start,
128+
feed_service_window_end,
129+
formatting_timezone,
130+
)
116131
) is not None:
117132
utc_service_start_date, utc_service_end_date = result
118133
dataset.service_date_range_start = utc_service_start_date
119134
dataset.service_date_range_end = utc_service_end_date
120135

121-
formatting_timezone = extract_timezone_from_json_validation_report(
122-
json_data
123-
)
124-
125136
if formatting_timezone is not None:
126137
dataset.agency_timezone = formatting_timezone
127138

functions-python/backfill_dataset_service_date_range/tests/test_backfill_dataset_service_date_range_main.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,16 @@ def test_backfill_datasets_service_date_range_swap(mock_get, mock_storage_client
148148
changes_count = backfill_datasets(mock_session)
149149

150150
assert changes_count == 1
151-
assert mock_dataset.service_date_range_start == "2023-01-01"
152-
assert mock_dataset.service_date_range_end == "2023-12-31"
151+
152+
expected_range_start = datetime.strptime("2023-01-01", "%Y-%m-%d").replace(
153+
hour=0, minute=0, tzinfo=timezone.utc
154+
)
155+
expected_range_end = datetime.strptime("2023-12-31", "%Y-%m-%d").replace(
156+
hour=23, minute=59, tzinfo=timezone.utc
157+
)
158+
159+
assert mock_dataset.service_date_range_start == expected_range_start
160+
assert mock_dataset.service_date_range_end == expected_range_end
153161
mock_get.assert_called_once_with(
154162
"http://example-1.com/report.json"
155163
) # latest validation report

functions-python/helpers/tests/test_timezone.py

Lines changed: 75 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -58,65 +58,99 @@ def test_multiple_agencies(self):
5858

5959
class TestGetServiceDateRangeWithTimezoneUTC(unittest.TestCase):
6060
def test_valid_dates_with_timezone(self):
61-
json_report = {
62-
"summary": {
63-
"feedInfo": {
64-
"feedServiceWindowStart": "2025-03-04",
65-
"feedServiceWindowEnd": "2025-03-10",
66-
},
67-
"agencies": [{"timezone": "Asia/Tokyo"}],
68-
}
69-
}
70-
result = get_service_date_range_with_timezone_utc(json_report)
61+
feed_service_window_start = "2025-03-04"
62+
feed_service_window_end = "2025-03-10"
63+
timezone = "Asia/Tokyo"
64+
65+
result = get_service_date_range_with_timezone_utc(
66+
feed_service_window_start, feed_service_window_end, timezone
67+
)
68+
7169
expected_start = datetime(
7270
2025, 3, 3, 15, 0, tzinfo=ZoneInfo("UTC")
7371
) # Asia/Tokyo is UTC-9
74-
expected_end = datetime(
75-
2025, 3, 10, 14, 59, tzinfo=ZoneInfo("UTC")
76-
) # Asia/Tokyo is UTC-9
72+
expected_end = datetime(2025, 3, 10, 14, 59, tzinfo=ZoneInfo("UTC"))
7773
self.assertEqual(result, [expected_start, expected_end])
7874

7975
def test_valid_dates_with_utc(self):
80-
json_report = {
81-
"summary": {
82-
"feedInfo": {
83-
"feedServiceWindowStart": "2025-03-01",
84-
"feedServiceWindowEnd": "2025-03-10",
85-
}
86-
}
87-
}
88-
result = get_service_date_range_with_timezone_utc(json_report)
76+
feed_service_window_start = "2025-03-01"
77+
feed_service_window_end = "2025-03-10"
78+
timezone = "UTC"
79+
80+
result = get_service_date_range_with_timezone_utc(
81+
feed_service_window_start, feed_service_window_end, timezone
82+
)
83+
8984
expected_start = datetime(2025, 3, 1, 0, 0, tzinfo=ZoneInfo("UTC"))
9085
expected_end = datetime(2025, 3, 10, 23, 59, tzinfo=ZoneInfo("UTC"))
9186
self.assertEqual(result, [expected_start, expected_end])
9287

9388
def test_missing_feed_service_window_start(self):
94-
json_report = {"summary": {"feedInfo": {"feedServiceWindowEnd": "2025-03-10"}}}
95-
result = get_service_date_range_with_timezone_utc(json_report)
89+
feed_service_window_start = None
90+
feed_service_window_end = "2025-03-10"
91+
timezone = "UTC"
92+
93+
result = get_service_date_range_with_timezone_utc(
94+
feed_service_window_start, feed_service_window_end, timezone
95+
)
96+
9697
self.assertIsNone(result)
9798

9899
def test_missing_feed_service_window_end(self):
99-
json_report = {
100-
"summary": {"feedInfo": {"feedServiceWindowStart": "2025-03-01"}}
101-
}
102-
result = get_service_date_range_with_timezone_utc(json_report)
100+
feed_service_window_start = "2025-03-01"
101+
feed_service_window_end = None
102+
timezone = "UTC"
103+
104+
result = get_service_date_range_with_timezone_utc(
105+
feed_service_window_start, feed_service_window_end, timezone
106+
)
107+
103108
self.assertIsNone(result)
104109

105-
def test_invalid_date_format(self):
106-
json_report = {
107-
"summary": {
108-
"feedInfo": {
109-
"feedServiceWindowStart": "03-01-2025", # Wrong format (MM-DD-YYYY)
110-
"feedServiceWindowEnd": "2025-03-10",
111-
}
112-
}
113-
}
114-
result = get_service_date_range_with_timezone_utc(json_report)
110+
def test_invalid_date_format_start(self):
111+
feed_service_window_start = "03-01-2025" # Invalid format (MM-DD-YYYY)
112+
feed_service_window_end = "2025-03-10"
113+
timezone = "UTC"
114+
115+
result = get_service_date_range_with_timezone_utc(
116+
feed_service_window_start, feed_service_window_end, timezone
117+
)
118+
119+
self.assertIsNone(result)
120+
121+
def test_invalid_date_format_end(self):
122+
feed_service_window_start = "2025-03-01"
123+
feed_service_window_end = "03-10-2025" # Invalid format (MM-DD-YYYY)
124+
timezone = "UTC"
125+
126+
result = get_service_date_range_with_timezone_utc(
127+
feed_service_window_start, feed_service_window_end, timezone
128+
)
129+
115130
self.assertIsNone(result)
116131

117-
def test_missing_summary_feedinfo(self):
118-
json_report = {"summary": {}}
119-
result = get_service_date_range_with_timezone_utc(json_report)
132+
def test_default_timezone_utc(self):
133+
feed_service_window_start = "2025-03-01"
134+
feed_service_window_end = "2025-03-10"
135+
timezone = None # None should default to UTC
136+
137+
result = get_service_date_range_with_timezone_utc(
138+
feed_service_window_start, feed_service_window_end, timezone
139+
)
140+
141+
expected_start = datetime(2025, 3, 1, 0, 0, tzinfo=ZoneInfo("UTC"))
142+
expected_end = datetime(2025, 3, 10, 23, 59, tzinfo=ZoneInfo("UTC"))
143+
self.assertEqual(result, [expected_start, expected_end])
144+
145+
def test_invalid_service_date_range(self):
146+
feed_service_window_start = None
147+
feed_service_window_end = None
148+
timezone = "UTC"
149+
150+
result = get_service_date_range_with_timezone_utc(
151+
feed_service_window_start, feed_service_window_end, timezone
152+
)
153+
120154
self.assertIsNone(result)
121155

122156

functions-python/helpers/timezone.py

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from zoneinfo import available_timezones
22
from datetime import datetime
3-
from transform import get_nested_value
43
from zoneinfo import ZoneInfo
54
from typing import List, Optional
65
import logging
@@ -39,25 +38,15 @@ def extract_timezone_from_json_validation_report(json_data: dict) -> Optional[st
3938
return extracted_timezone
4039

4140

42-
# This function is used to extract the service date range from the JSON report
43-
# Return the service date range in UTC timezone as a list: [start_date, end_date]
44-
def get_service_date_range_with_timezone_utc(json_report) -> Optional[List[datetime]]:
41+
def get_service_date_range_with_timezone_utc(
42+
feed_service_window_start, feed_service_window_end, timezone
43+
) -> Optional[List[datetime]]:
4544
"""
46-
Populates the service date range of the dataset based on the JSON report.
47-
The service date range is extracted from the feedServiceWindowStart and feedServiceWindowEnd fields,
48-
if both are present and not empty.
45+
Takes the service date range in %Y-%m-%d format and converts it to UTC timezone.
4946
"""
50-
51-
timezone = extract_timezone_from_json_validation_report(json_report)
5247
if timezone is None:
5348
timezone = "UTC"
5449

55-
feed_service_window_start = get_nested_value(
56-
json_report, ["summary", "feedInfo", "feedServiceWindowStart"]
57-
)
58-
feed_service_window_end = get_nested_value(
59-
json_report, ["summary", "feedInfo", "feedServiceWindowEnd"]
60-
)
6150
if feed_service_window_start and feed_service_window_end:
6251
# service date range is found
6352
formatted_service_start_date = None
@@ -88,6 +77,14 @@ def get_service_date_range_with_timezone_utc(json_report) -> Optional[List[datet
8877
)
8978
return None
9079

80+
# this check is due to an issue in the validation report
81+
# where the start date could be later than the end date
82+
if formatted_service_start_date > formatted_service_end_date:
83+
formatted_service_start_date, formatted_service_end_date = (
84+
formatted_service_end_date,
85+
formatted_service_start_date,
86+
)
87+
9188
local_service_start_date = formatted_service_start_date.replace(
9289
hour=0, minute=0, tzinfo=ZoneInfo(timezone)
9390
)
@@ -98,13 +95,8 @@ def get_service_date_range_with_timezone_utc(json_report) -> Optional[List[datet
9895
)
9996
utc_service_end_date = local_service_end_date.astimezone(ZoneInfo("UTC"))
10097

101-
# this check is due to an issue in the validation report
102-
# where the start date could be later than the end date
103-
if utc_service_start_date > utc_service_end_date:
104-
return [utc_service_end_date, utc_service_start_date]
105-
else:
106-
return [utc_service_start_date, utc_service_end_date]
98+
return [utc_service_start_date, utc_service_end_date]
10799

108100
else:
109-
logging.error("service date range not found in json_report")
101+
logging.error("service date range not found")
110102
return None

functions-python/process_validation_report/src/main.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ def generate_report_entities(
154154
dataset = get_dataset(dataset_stable_id, session)
155155
dataset.validation_reports.append(validation_report_entity)
156156

157-
populate_service_date(dataset, json_report)
158-
159157
extracted_timezone = extract_timezone_from_json_validation_report(json_report)
160158
if extracted_timezone is not None:
161159
dataset.agency_timezone = extracted_timezone
162160

161+
populate_service_date(dataset, json_report, extracted_timezone)
162+
163163
for feature_name in get_nested_value(json_report, ["summary", "gtfsFeatures"], []):
164164
feature = get_feature(feature_name, session)
165165
feature.validations.append(validation_report_entity)
@@ -177,14 +177,23 @@ def generate_report_entities(
177177
return entities
178178

179179

180-
def populate_service_date(dataset, json_report):
180+
def populate_service_date(dataset, json_report, timezone=None):
181181
"""
182182
Populates the service date range of the dataset based on the JSON report.
183183
The service date range is extracted from the feedServiceWindowStart and feedServiceWindowEnd fields,
184184
if both are present and not empty.
185185
"""
186-
187-
if (result := get_service_date_range_with_timezone_utc(json_report)) is not None:
186+
feed_service_window_start = get_nested_value(
187+
json_report, ["summary", "feedInfo", "feedServiceWindowStart"]
188+
)
189+
feed_service_window_end = get_nested_value(
190+
json_report, ["summary", "feedInfo", "feedServiceWindowEnd"]
191+
)
192+
if (
193+
result := get_service_date_range_with_timezone_utc(
194+
feed_service_window_start, feed_service_window_end, timezone
195+
)
196+
) is not None:
188197
utc_service_start_date, utc_service_end_date = result
189198
dataset.service_date_range_start = utc_service_start_date
190199
dataset.service_date_range_end = utc_service_end_date

0 commit comments

Comments
 (0)