Skip to content

Commit 655bc2f

Browse files
committed
Update service range backfill function
1 parent d7bcde0 commit 655bc2f

File tree

2 files changed

+24
-23
lines changed

2 files changed

+24
-23
lines changed

functions-python/backfill_dataset_service_date_range/src/main.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
import functions_framework
44

5-
from shared.helpers.logger import Logger
5+
from shared.helpers.logger import init_logger
66

77
from shared.database.database import with_db_session, refresh_materialized_view
88

@@ -28,7 +28,7 @@
2828
env = os.getenv("ENV", "dev").lower()
2929
bucket_name = f"mobilitydata-datasets-{env}"
3030

31-
logging.basicConfig(level=logging.INFO)
31+
init_logger()
3232

3333

3434
def is_version_gte(target_version: str, version_field):
@@ -66,10 +66,10 @@ def backfill_datasets(session: "Session"):
6666
)
6767
).all()
6868

69-
logging.info(f"Found {len(datasets)} datasets to process.")
69+
logging.info("Found %s datasets to process.", len(datasets))
7070

7171
for dataset in datasets:
72-
logging.info(f"Processing gtfsdataset ID {dataset.stable_id}")
72+
logging.info("Processing gtfsdataset ID %s", dataset.stable_id)
7373
gtfsdataset_id = dataset.stable_id
7474
feed_stable_id = "-".join(gtfsdataset_id.split("-")[0:2])
7575
# Get the latest validation report for the dataset
@@ -81,7 +81,8 @@ def backfill_datasets(session: "Session"):
8181

8282
if not latest_validation_report:
8383
logging.info(
84-
f"Skipping gtfsdataset ID {gtfsdataset_id}: no validation reports found."
84+
"Skipping gtfsdataset ID %s: no validation reports found.",
85+
gtfsdataset_id,
8586
)
8687
continue
8788

@@ -90,7 +91,7 @@ def backfill_datasets(session: "Session"):
9091
try:
9192
# Download the JSON report
9293
blob_url = f"{feed_stable_id}/{gtfsdataset_id}/report_{latest_validation_report.validator_version}.json"
93-
logging.info("Blob URL: " + blob_url)
94+
logging.info("Blob URL: %s", blob_url)
9495
dataset_blob = bucket.blob(blob_url)
9596
if not dataset_blob.exists():
9697
logging.info("Blob not found, downloading from URL")
@@ -102,7 +103,9 @@ def backfill_datasets(session: "Session"):
102103
logging.info("Blob found, downloading from blob")
103104
json_data = json.loads(dataset_blob.download_as_string())
104105
except Exception as e:
105-
logging.error(f"Error downloading blob: {e} trying json report url")
106+
logging.error(
107+
"Error downloading blob trying json report url: %s", e
108+
)
106109
response = requests.get(json_report_url)
107110
response.raise_for_status()
108111
json_data = response.json()
@@ -133,7 +136,9 @@ def backfill_datasets(session: "Session"):
133136

134137
formatted_dates = f"{utc_service_start_date:%Y-%m-%d %H:%M} - {utc_service_end_date:%Y-%m-%d %H:%M}"
135138
logging.info(
136-
f"Updated gtfsdataset ID {gtfsdataset_id} with value: {formatted_dates}"
139+
"Updated gtfsdataset ID %s with value: ",
140+
gtfsdataset_id,
141+
formatted_dates,
137142
)
138143
total_changes_count += 1
139144
changes_count += 1
@@ -151,32 +156,31 @@ def backfill_datasets(session: "Session"):
151156

152157
except requests.RequestException as e:
153158
logging.error(
154-
f"Error downloading JSON for gtfsdataset ID {gtfsdataset_id}: {e}"
159+
"Error downloading JSON for gtfsdataset ID %s: %s", gtfsdataset_id, e
155160
)
156161
except json.JSONDecodeError as e:
157162
logging.error(
158-
f"Error parsing JSON for gtfsdataset ID {gtfsdataset_id}: {e}"
163+
"Error parsing JSON for gtfsdataset ID %s: %s", gtfsdataset_id, e
159164
)
160165
except Exception as e:
161-
logging.error(f"Error processing gtfsdataset ID {gtfsdataset_id}: {e}")
166+
logging.error("Error processing gtfsdataset ID %s: %s", gtfsdataset_id, e)
162167

163168
try:
164169
session.commit()
165170
logging.info("Database changes committed.")
166171
session.close()
167172
return total_changes_count
168173
except Exception as e:
169-
logging.error("Error committing changes:", e)
174+
logging.error("Error committing changes: %s", e)
170175
session.rollback()
171176
session.close()
172-
raise Exception(f"Error creating dataset: {e}")
177+
raise Exception("Error creating dataset: %s", e)
173178

174179

175180
@functions_framework.http
176181
@with_db_session
177182
def backfill_dataset_service_date_range(_, db_session: Session):
178183
"""Fills gtfs dataset service date range from the latest validation report."""
179-
Logger.init_logger()
180184
change_count = 0
181185
try:
182186
logging.info("Database session started.")
@@ -186,4 +190,6 @@ def backfill_dataset_service_date_range(_, db_session: Session):
186190
logging.error(f"Error setting the datasets service date range values: {error}")
187191
return f"Error setting the datasets service date range values: {error}", 500
188192

189-
return f"Script executed successfully. {change_count} datasets updated", 200
193+
result = f"Script executed successfully. {change_count} datasets updated"
194+
logging.info(result)
195+
return result, 200

functions-python/backfill_dataset_service_date_range/tests/test_backfill_dataset_service_date_range_main.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,9 @@ def test_backfill_datasets_service_date_range_swap(mock_get, mock_storage_client
164164
mock_session.commit.assert_called_once()
165165

166166

167-
@patch("logging.error", autospec=True)
168167
@patch("google.cloud.storage.Client", autospec=True)
169168
@patch("requests.get")
170-
def test_backfill_datasets_error_commit(mock_get, mock_storage_client, mock_logger):
169+
def test_backfill_datasets_error_commit(mock_get, mock_storage_client):
171170
# Mock the storage client and bucket
172171
mock_bucket = MagicMock()
173172
mock_client_instance = mock_storage_client.return_value
@@ -412,9 +411,8 @@ def test_backfill_datasets_fail_to_get_validation_report(mock_get, mock_storage_
412411
mock_session.commit.assert_called_once()
413412

414413

415-
@patch("main.Logger", autospec=True)
416414
@patch("main.backfill_datasets")
417-
def test_backfill_dataset_service_date_range(mock_backfill_datasets, mock_logger):
415+
def test_backfill_dataset_service_date_range(mock_backfill_datasets):
418416
mock_backfill_datasets.return_value = 5
419417

420418
with patch.dict(os.environ, {"FEEDS_DATABASE_URL": default_db_url}):
@@ -425,11 +423,8 @@ def test_backfill_dataset_service_date_range(mock_backfill_datasets, mock_logger
425423
assert status_code == 200
426424

427425

428-
@patch("main.Logger", autospec=True)
429426
@patch("main.backfill_datasets")
430-
def test_backfill_dataset_service_date_range_error_raised(
431-
mock_backfill_datasets, mock_logger
432-
):
427+
def test_backfill_dataset_service_date_range_error_raised(mock_backfill_datasets):
433428
mock_backfill_datasets.side_effect = Exception("Mocked exception")
434429

435430
with patch.dict(os.environ, {"FEEDS_DATABASE_URL": default_db_url}):

0 commit comments

Comments
 (0)